Merge branch 'brightcove_in_page_embed' of https://github.com/remitamine/youtube...
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7 import sys
8
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12     compat_etree_fromstring,
13     compat_urllib_parse_unquote,
14     compat_urllib_request,
15     compat_urlparse,
16     compat_xml_parse_error,
17 )
18 from ..utils import (
19     determine_ext,
20     ExtractorError,
21     float_or_none,
22     HEADRequest,
23     is_html,
24     orderedSet,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import (
34     BrightcoveIE,
35     BrightcoveInPageEmbedIE,
36 )
37 from .nbc import NBCSportsVPlayerIE
38 from .ooyala import OoyalaIE
39 from .rutv import RUTVIE
40 from .tvc import TVCIE
41 from .sportbox import SportBoxEmbedIE
42 from .smotri import SmotriIE
43 from .myvi import MyviIE
44 from .condenast import CondeNastIE
45 from .udn import UDNEmbedIE
46 from .senateisvp import SenateISVPIE
47 from .bliptv import BlipTVIE
48 from .svt import SVTIE
49 from .pornhub import PornHubIE
50 from .xhamster import XHamsterEmbedIE
51 from .vimeo import VimeoIE
52 from .dailymotion import DailymotionCloudIE
53 from .onionstudios import OnionStudiosIE
54 from .snagfilms import SnagFilmsEmbedIE
55 from .screenwavemedia import ScreenwaveMediaIE
56 from .mtv import MTVServicesEmbeddedIE
57
58
59 class GenericIE(InfoExtractor):
60     IE_DESC = 'Generic downloader that works on some sites'
61     _VALID_URL = r'.*'
62     IE_NAME = 'generic'
63     _TESTS = [
64         # Direct link to a video
65         {
66             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
67             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
68             'info_dict': {
69                 'id': 'trailer',
70                 'ext': 'mp4',
71                 'title': 'trailer',
72                 'upload_date': '20100513',
73             }
74         },
75         # Direct link to media delivered compressed (until Accept-Encoding is *)
76         {
77             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
78             'md5': '128c42e68b13950268b648275386fc74',
79             'info_dict': {
80                 'id': 'FictionJunction-Parallel_Hearts',
81                 'ext': 'flac',
82                 'title': 'FictionJunction-Parallel_Hearts',
83                 'upload_date': '20140522',
84             },
85             'expected_warnings': [
86                 'URL could be a direct video link, returning it as such.'
87             ]
88         },
89         # Direct download with broken HEAD
90         {
91             'url': 'http://ai-radio.org:8000/radio.opus',
92             'info_dict': {
93                 'id': 'radio',
94                 'ext': 'opus',
95                 'title': 'radio',
96             },
97             'params': {
98                 'skip_download': True,  # infinite live stream
99             },
100             'expected_warnings': [
101                 r'501.*Not Implemented'
102             ],
103         },
104         # Direct link with incorrect MIME type
105         {
106             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
107             'md5': '4ccbebe5f36706d85221f204d7eb5913',
108             'info_dict': {
109                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
110                 'id': '5_Lennart_Poettering_-_Systemd',
111                 'ext': 'webm',
112                 'title': '5_Lennart_Poettering_-_Systemd',
113                 'upload_date': '20141120',
114             },
115             'expected_warnings': [
116                 'URL could be a direct video link, returning it as such.'
117             ]
118         },
119         # RSS feed
120         {
121             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
122             'info_dict': {
123                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
124                 'title': 'Zero Punctuation',
125                 'description': 're:.*groundbreaking video review series.*'
126             },
127             'playlist_mincount': 11,
128         },
129         # RSS feed with enclosure
130         {
131             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
132             'info_dict': {
133                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
134                 'ext': 'm4v',
135                 'upload_date': '20150228',
136                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
137             }
138         },
139         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
140         {
141             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
142             'info_dict': {
143                 'id': 'smil',
144                 'ext': 'mp4',
145                 'title': 'Automatics, robotics and biocybernetics',
146                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
147                 'upload_date': '20130627',
148                 'formats': 'mincount:16',
149                 'subtitles': 'mincount:1',
150             },
151             'params': {
152                 'force_generic_extractor': True,
153                 'skip_download': True,
154             },
155         },
156         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
157         {
158             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
159             'info_dict': {
160                 'id': 'hds',
161                 'ext': 'flv',
162                 'title': 'hds',
163                 'formats': 'mincount:1',
164             },
165             'params': {
166                 'skip_download': True,
167             },
168         },
169         # SMIL from https://www.restudy.dk/video/play/id/1637
170         {
171             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
172             'info_dict': {
173                 'id': 'video_1637',
174                 'ext': 'flv',
175                 'title': 'video_1637',
176                 'formats': 'mincount:3',
177             },
178             'params': {
179                 'skip_download': True,
180             },
181         },
182         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
183         {
184             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
185             'info_dict': {
186                 'id': 'smil-service',
187                 'ext': 'flv',
188                 'title': 'smil-service',
189                 'formats': 'mincount:1',
190             },
191             'params': {
192                 'skip_download': True,
193             },
194         },
195         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
196         {
197             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
198             'info_dict': {
199                 'id': '4719370',
200                 'ext': 'mp4',
201                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
202                 'formats': 'mincount:3',
203             },
204             'params': {
205                 'skip_download': True,
206             },
207         },
208         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
209         {
210             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
211             'info_dict': {
212                 'id': 'mZlp2ctYIUEB',
213                 'ext': 'mp4',
214                 'title': 'Tikibad ontruimd wegens brand',
215                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
216                 'thumbnail': 're:^https?://.*\.jpg$',
217                 'duration': 33,
218             },
219             'params': {
220                 'skip_download': True,
221             },
222         },
223         # google redirect
224         {
225             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
226             'info_dict': {
227                 'id': 'cmQHVoWB5FY',
228                 'ext': 'mp4',
229                 'upload_date': '20130224',
230                 'uploader_id': 'TheVerge',
231                 'description': 're:^Chris Ziegler takes a look at the\.*',
232                 'uploader': 'The Verge',
233                 'title': 'First Firefox OS phones side-by-side',
234             },
235             'params': {
236                 'skip_download': False,
237             }
238         },
239         {
240             # redirect in Refresh HTTP header
241             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
242             'info_dict': {
243                 'id': 'pO8h3EaFRdo',
244                 'ext': 'mp4',
245                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
246                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
247                 'upload_date': '20150917',
248                 'uploader_id': 'brtvofficial',
249                 'uploader': 'Boiler Room',
250             },
251             'params': {
252                 'skip_download': False,
253             },
254         },
255         {
256             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
257             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
258             'info_dict': {
259                 'id': '13601338388002',
260                 'ext': 'mp4',
261                 'uploader': 'www.hodiho.fr',
262                 'title': 'R\u00e9gis plante sa Jeep',
263             }
264         },
265         # bandcamp page with custom domain
266         {
267             'add_ie': ['Bandcamp'],
268             'url': 'http://bronyrock.com/track/the-pony-mash',
269             'info_dict': {
270                 'id': '3235767654',
271                 'ext': 'mp3',
272                 'title': 'The Pony Mash',
273                 'uploader': 'M_Pallante',
274             },
275             'skip': 'There is a limit of 200 free downloads / month for the test song',
276         },
277         # embedded brightcove video
278         # it also tests brightcove videos that need to set the 'Referer' in the
279         # http requests
280         {
281             'add_ie': ['Brightcove'],
282             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
283             'info_dict': {
284                 'id': '2765128793001',
285                 'ext': 'mp4',
286                 'title': 'Le cours de bourse : l’analyse technique',
287                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
288                 'uploader': 'BFM BUSINESS',
289             },
290             'params': {
291                 'skip_download': True,
292             },
293         },
294         {
295             # https://github.com/rg3/youtube-dl/issues/2253
296             'url': 'http://bcove.me/i6nfkrc3',
297             'md5': '0ba9446db037002366bab3b3eb30c88c',
298             'info_dict': {
299                 'id': '3101154703001',
300                 'ext': 'mp4',
301                 'title': 'Still no power',
302                 'uploader': 'thestar.com',
303                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
304             },
305             'add_ie': ['Brightcove'],
306         },
307         {
308             'url': 'http://www.championat.com/video/football/v/87/87499.html',
309             'md5': 'fb973ecf6e4a78a67453647444222983',
310             'info_dict': {
311                 'id': '3414141473001',
312                 'ext': 'mp4',
313                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
314                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
315                 'uploader': 'Championat',
316             },
317         },
318         {
319             # https://github.com/rg3/youtube-dl/issues/3541
320             'add_ie': ['Brightcove'],
321             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
322             'info_dict': {
323                 'id': '3866516442001',
324                 'ext': 'mp4',
325                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
326                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
327                 'uploader': 'SBS Broadcasting',
328             },
329             'skip': 'Restricted to Netherlands',
330             'params': {
331                 'skip_download': True,  # m3u8 download
332             },
333         },
334         # ooyala video
335         {
336             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
337             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
338             'info_dict': {
339                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
340                 'ext': 'mp4',
341                 'title': '2cc213299525360.mov',  # that's what we get
342             },
343             'add_ie': ['Ooyala'],
344         },
345         {
346             # ooyala video embedded with http://player.ooyala.com/iframe.js
347             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
348             'info_dict': {
349                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
350                 'ext': 'mp4',
351                 'title': '"Steve Jobs: Man in the Machine" trailer',
352                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
353             },
354             'params': {
355                 'skip_download': True,
356             },
357         },
358         # multiple ooyala embeds on SBN network websites
359         {
360             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
361             'info_dict': {
362                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
363                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
364             },
365             'playlist_mincount': 3,
366             'params': {
367                 'skip_download': True,
368             },
369             'add_ie': ['Ooyala'],
370         },
371         # embed.ly video
372         {
373             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
374             'info_dict': {
375                 'id': '9ODmcdjQcHQ',
376                 'ext': 'mp4',
377                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
378                 'upload_date': '20140225',
379                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
380                 'uploader': 'Tested',
381                 'uploader_id': 'testedcom',
382             },
383             # No need to test YoutubeIE here
384             'params': {
385                 'skip_download': True,
386             },
387         },
388         # funnyordie embed
389         {
390             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
391             'info_dict': {
392                 'id': '18e820ec3f',
393                 'ext': 'mp4',
394                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
395                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
396             },
397         },
398         # RUTV embed
399         {
400             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
401             'info_dict': {
402                 'id': '776940',
403                 'ext': 'mp4',
404                 'title': 'Охотское море стало целиком российским',
405                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
406             },
407             'params': {
408                 # m3u8 download
409                 'skip_download': True,
410             },
411         },
412         # TVC embed
413         {
414             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
415             'info_dict': {
416                 'id': '55304',
417                 'ext': 'mp4',
418                 'title': 'Дошкольное воспитание',
419             },
420         },
421         # SportBox embed
422         {
423             'url': 'http://www.vestifinance.ru/articles/25753',
424             'info_dict': {
425                 'id': '25753',
426                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
427             },
428             'playlist': [{
429                 'info_dict': {
430                     'id': '370908',
431                     'title': 'Госзаказ. День 3',
432                     'ext': 'mp4',
433                 }
434             }, {
435                 'info_dict': {
436                     'id': '370905',
437                     'title': 'Госзаказ. День 2',
438                     'ext': 'mp4',
439                 }
440             }, {
441                 'info_dict': {
442                     'id': '370902',
443                     'title': 'Госзаказ. День 1',
444                     'ext': 'mp4',
445                 }
446             }],
447             'params': {
448                 # m3u8 download
449                 'skip_download': True,
450             },
451         },
452         # Myvi.ru embed
453         {
454             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
455             'info_dict': {
456                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
457                 'ext': 'mp4',
458                 'title': 'Ужастики, русский трейлер (2015)',
459                 'thumbnail': 're:^https?://.*\.jpg$',
460                 'duration': 153,
461             }
462         },
463         # XHamster embed
464         {
465             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
466             'info_dict': {
467                 'id': 'showthread',
468                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
469             },
470             'playlist_mincount': 7,
471         },
472         # Embedded TED video
473         {
474             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
475             'md5': '65fdff94098e4a607385a60c5177c638',
476             'info_dict': {
477                 'id': '1969',
478                 'ext': 'mp4',
479                 'title': 'Hidden miracles of the natural world',
480                 'uploader': 'Louie Schwartzberg',
481                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
482             }
483         },
484         # Embeded Ustream video
485         {
486             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
487             'md5': '27b99cdb639c9b12a79bca876a073417',
488             'info_dict': {
489                 'id': '45734260',
490                 'ext': 'flv',
491                 'uploader': 'AU SPA:  The NSA and Privacy',
492                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
493             }
494         },
495         # nowvideo embed hidden behind percent encoding
496         {
497             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
498             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
499             'info_dict': {
500                 'id': '06e53103ca9aa',
501                 'ext': 'flv',
502                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
503                 'description': 'No description',
504             },
505         },
506         # arte embed
507         {
508             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
509             'md5': '7653032cbb25bf6c80d80f217055fa43',
510             'info_dict': {
511                 'id': '048195-004_PLUS7-F',
512                 'ext': 'flv',
513                 'title': 'X:enius',
514                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
515                 'upload_date': '20140320',
516             },
517             'params': {
518                 'skip_download': 'Requires rtmpdump'
519             }
520         },
521         # francetv embed
522         {
523             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
524             'info_dict': {
525                 'id': 'EV_30231',
526                 'ext': 'mp4',
527                 'title': 'Alcaline, le concert avec Calogero',
528                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
529                 'upload_date': '20150226',
530                 'timestamp': 1424989860,
531                 'duration': 5400,
532             },
533             'params': {
534                 # m3u8 downloads
535                 'skip_download': True,
536             },
537             'expected_warnings': [
538                 'Forbidden'
539             ]
540         },
541         # Condé Nast embed
542         {
543             'url': 'http://www.wired.com/2014/04/honda-asimo/',
544             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
545             'info_dict': {
546                 'id': '53501be369702d3275860000',
547                 'ext': 'mp4',
548                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
549             }
550         },
551         # Dailymotion embed
552         {
553             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
554             'md5': '441aeeb82eb72c422c7f14ec533999cd',
555             'info_dict': {
556                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
557                 'ext': 'mp4',
558                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
559                 'uploader': 'Spi0n',
560             },
561             'add_ie': ['Dailymotion'],
562         },
563         # YouTube embed
564         {
565             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
566             'info_dict': {
567                 'id': 'FXRb4ykk4S0',
568                 'ext': 'mp4',
569                 'title': 'The NBL Auction 2014',
570                 'uploader': 'BADMINTON England',
571                 'uploader_id': 'BADMINTONEvents',
572                 'upload_date': '20140603',
573                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
574             },
575             'add_ie': ['Youtube'],
576             'params': {
577                 'skip_download': True,
578             }
579         },
580         # MTVSercices embed
581         {
582             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
583             'md5': '35727f82f58c76d996fc188f9755b0d5',
584             'info_dict': {
585                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
586                 'ext': 'mp4',
587                 'title': 'Review',
588                 'description': 'Mario\'s life in the fast lane has never looked so good.',
589             },
590         },
591         # YouTube embed via <data-embed-url="">
592         {
593             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
594             'info_dict': {
595                 'id': '4vAffPZIT44',
596                 'ext': 'mp4',
597                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
598                 'uploader': 'Gameloft',
599                 'uploader_id': 'gameloft',
600                 'upload_date': '20140828',
601                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
602             },
603             'params': {
604                 'skip_download': True,
605             }
606         },
607         # Camtasia studio
608         {
609             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
610             'playlist': [{
611                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
612                 'info_dict': {
613                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
614                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
615                     'ext': 'flv',
616                     'duration': 2235.90,
617                 }
618             }, {
619                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
620                 'info_dict': {
621                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
622                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
623                     'ext': 'flv',
624                     'duration': 2235.93,
625                 }
626             }],
627             'info_dict': {
628                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
629             }
630         },
631         # Flowplayer
632         {
633             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
634             'md5': '9d65602bf31c6e20014319c7d07fba27',
635             'info_dict': {
636                 'id': '5123ea6d5e5a7',
637                 'ext': 'mp4',
638                 'age_limit': 18,
639                 'uploader': 'www.handjobhub.com',
640                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
641             }
642         },
643         # Multiple brightcove videos
644         # https://github.com/rg3/youtube-dl/issues/2283
645         {
646             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
647             'info_dict': {
648                 'id': 'always-never',
649                 'title': 'Always / Never - The New Yorker',
650             },
651             'playlist_count': 3,
652             'params': {
653                 'extract_flat': False,
654                 'skip_download': True,
655             }
656         },
657         # MLB embed
658         {
659             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
660             'md5': '96f09a37e44da40dd083e12d9a683327',
661             'info_dict': {
662                 'id': '33322633',
663                 'ext': 'mp4',
664                 'title': 'Ump changes call to ball',
665                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
666                 'duration': 48,
667                 'timestamp': 1401537900,
668                 'upload_date': '20140531',
669                 'thumbnail': 're:^https?://.*\.jpg$',
670             },
671         },
672         # Wistia embed
673         {
674             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
675             'md5': '8788b683c777a5cf25621eaf286d0c23',
676             'info_dict': {
677                 'id': '1cfaf6b7ea',
678                 'ext': 'mov',
679                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
680                 'duration': 643.0,
681                 'filesize': 182808282,
682                 'uploader': 'education-portal.com',
683             },
684         },
685         {
686             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
687             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
688             'info_dict': {
689                 'id': 'uxjb0lwrcz',
690                 'ext': 'mp4',
691                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
692                 'duration': 1715.0,
693                 'uploader': 'thoughtworks.wistia.com',
694             },
695         },
696         # Soundcloud embed
697         {
698             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
699             'info_dict': {
700                 'id': '174391317',
701                 'ext': 'mp3',
702                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
703                 'uploader': 'Sophos Security',
704                 'title': 'Chet Chat 171 - Oct 29, 2014',
705                 'upload_date': '20141029',
706             }
707         },
708         # Livestream embed
709         {
710             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
711             'info_dict': {
712                 'id': '67864563',
713                 'ext': 'flv',
714                 'upload_date': '20141112',
715                 'title': 'Rosetta #CometLanding webcast HL 10',
716             }
717         },
718         # LazyYT
719         {
720             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
721             'info_dict': {
722                 'id': '1986',
723                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
724             },
725             'playlist_mincount': 2,
726         },
727         # Cinchcast embed
728         {
729             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
730             'info_dict': {
731                 'id': '7141703',
732                 'ext': 'mp3',
733                 'upload_date': '20141126',
734                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
735             }
736         },
737         # Cinerama player
738         {
739             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
740             'info_dict': {
741                 'id': '730m_DandD_1901_512k',
742                 'ext': 'mp4',
743                 'uploader': 'www.abc.net.au',
744                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
745             }
746         },
747         # embedded viddler video
748         {
749             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
750             'info_dict': {
751                 'id': '4d03aad9',
752                 'ext': 'mp4',
753                 'uploader': 'deadspin',
754                 'title': 'WALL-TO-GORTAT',
755                 'timestamp': 1422285291,
756                 'upload_date': '20150126',
757             },
758             'add_ie': ['Viddler'],
759         },
760         # Libsyn embed
761         {
762             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
763             'info_dict': {
764                 'id': '3377616',
765                 'ext': 'mp3',
766                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
767                 'description': 'md5:601cb790edd05908957dae8aaa866465',
768                 'upload_date': '20150220',
769             },
770         },
771         # jwplayer YouTube
772         {
773             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
774             'info_dict': {
775                 'id': 'Mrj4DVp2zeA',
776                 'ext': 'mp4',
777                 'upload_date': '20150212',
778                 'uploader': 'The National Archives UK',
779                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
780                 'uploader_id': 'NationalArchives08',
781                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
782             },
783         },
784         # rtl.nl embed
785         {
786             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
787             'playlist_mincount': 5,
788             'info_dict': {
789                 'id': 'aanslagen-kopenhagen',
790                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
791             }
792         },
793         # Zapiks embed
794         {
795             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
796             'info_dict': {
797                 'id': '118046',
798                 'ext': 'mp4',
799                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
800             }
801         },
802         # Kaltura embed
803         {
804             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
805             'info_dict': {
806                 'id': '1_eergr3h1',
807                 'ext': 'mp4',
808                 'upload_date': '20150226',
809                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
810                 'timestamp': int,
811                 'title': 'John Carlson Postgame 2/25/15',
812             },
813         },
814         # Kaltura embed (different embed code)
815         {
816             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
817             'info_dict': {
818                 'id': '1_a52wc67y',
819                 'ext': 'flv',
820                 'upload_date': '20150127',
821                 'uploader_id': 'PremierMedia',
822                 'timestamp': int,
823                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
824             },
825         },
826         # Eagle.Platform embed (generic URL)
827         {
828             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
829             'info_dict': {
830                 'id': '227304',
831                 'ext': 'mp4',
832                 'title': 'Навальный вышел на свободу',
833                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
834                 'thumbnail': 're:^https?://.*\.jpg$',
835                 'duration': 87,
836                 'view_count': int,
837                 'age_limit': 0,
838             },
839         },
840         # ClipYou (Eagle.Platform) embed (custom URL)
841         {
842             'url': 'http://muz-tv.ru/play/7129/',
843             'info_dict': {
844                 'id': '12820',
845                 'ext': 'mp4',
846                 'title': "'O Sole Mio",
847                 'thumbnail': 're:^https?://.*\.jpg$',
848                 'duration': 216,
849                 'view_count': int,
850             },
851         },
852         # Pladform embed
853         {
854             'url': 'http://muz-tv.ru/kinozal/view/7400/',
855             'info_dict': {
856                 'id': '100183293',
857                 'ext': 'mp4',
858                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
859                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
860                 'thumbnail': 're:^https?://.*\.jpg$',
861                 'duration': 694,
862                 'age_limit': 0,
863             },
864         },
865         # Playwire embed
866         {
867             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
868             'info_dict': {
869                 'id': '3519514',
870                 'ext': 'mp4',
871                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
872                 'thumbnail': 're:^https?://.*\.png$',
873                 'duration': 45.115,
874             },
875         },
876         # 5min embed
877         {
878             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
879             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
880             'info_dict': {
881                 'id': '518726732',
882                 'ext': 'mp4',
883                 'title': 'Facebook Creates "On This Day" | Crunch Report',
884             },
885         },
886         # SVT embed
887         {
888             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
889             'info_dict': {
890                 'id': '2900353',
891                 'ext': 'flv',
892                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
893                 'duration': 27,
894                 'age_limit': 0,
895             },
896         },
897         # Crooks and Liars embed
898         {
899             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
900             'info_dict': {
901                 'id': '8RUoRhRi',
902                 'ext': 'mp4',
903                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
904                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
905                 'timestamp': 1428207000,
906                 'upload_date': '20150405',
907                 'uploader': 'Heather',
908             },
909         },
910         # Crooks and Liars external embed
911         {
912             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
913             'info_dict': {
914                 'id': 'MTE3MjUtMzQ2MzA',
915                 'ext': 'mp4',
916                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
917                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
918                 'timestamp': 1265032391,
919                 'upload_date': '20100201',
920                 'uploader': 'Heather',
921             },
922         },
923         # NBC Sports vplayer embed
924         {
925             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
926             'info_dict': {
927                 'id': 'ln7x1qSThw4k',
928                 'ext': 'flv',
929                 'title': "PFT Live: New leader in the 'new-look' defense",
930                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
931             },
932         },
933         # UDN embed
934         {
935             'url': 'http://www.udn.com/news/story/7314/822787',
936             'md5': 'fd2060e988c326991037b9aff9df21a6',
937             'info_dict': {
938                 'id': '300346',
939                 'ext': 'mp4',
940                 'title': '中一中男師變性 全校師生力挺',
941                 'thumbnail': 're:^https?://.*\.jpg$',
942             }
943         },
944         # Ooyala embed
945         {
946             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
947             'info_dict': {
948                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
949                 'ext': 'mp4',
950                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
951                 'title': 'This is what separates the Excel masters from the wannabes',
952             },
953             'params': {
954                 # m3u8 downloads
955                 'skip_download': True,
956             }
957         },
958         # Contains a SMIL manifest
959         {
960             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
961             'info_dict': {
962                 'id': 'file',
963                 'ext': 'flv',
964                 'title': '+ Football: Lottery Champions League Europe',
965                 'uploader': 'www.telewebion.com',
966             },
967             'params': {
968                 # rtmpe downloads
969                 'skip_download': True,
970             }
971         },
972         # Brightcove URL in single quotes
973         {
974             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
975             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
976             'info_dict': {
977                 'id': '4255764656001',
978                 'ext': 'mp4',
979                 'title': 'SN Presents: Russell Martin, World Citizen',
980                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
981                 'uploader': 'Rogers Sportsnet',
982             },
983         },
984         # Dailymotion Cloud video
985         {
986             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
987             'md5': '49444254273501a64675a7e68c502681',
988             'info_dict': {
989                 'id': '5585de919473990de4bee11b',
990                 'ext': 'mp4',
991                 'title': 'Le débat',
992                 'thumbnail': 're:^https?://.*\.jpe?g$',
993             }
994         },
995         # OnionStudios embed
996         {
997             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
998             'info_dict': {
999                 'id': '2855',
1000                 'ext': 'mp4',
1001                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1002                 'thumbnail': 're:^https?://.*\.jpe?g$',
1003                 'uploader': 'ClickHole',
1004                 'uploader_id': 'clickhole',
1005             }
1006         },
1007         # SnagFilms embed
1008         {
1009             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1010             'info_dict': {
1011                 'id': '74849a00-85a9-11e1-9660-123139220831',
1012                 'ext': 'mp4',
1013                 'title': '#whilewewatch',
1014             }
1015         },
1016         # AdobeTVVideo embed
1017         {
1018             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1019             'md5': '43662b577c018ad707a63766462b1e87',
1020             'info_dict': {
1021                 'id': '2456',
1022                 'ext': 'mp4',
1023                 'title': 'New experience with Acrobat DC',
1024                 'description': 'New experience with Acrobat DC',
1025                 'duration': 248.667,
1026             },
1027         },
1028         # ScreenwaveMedia embed
1029         {
1030             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1031             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1032             'info_dict': {
1033                 'id': 'cinemasnob-55d26273809dd',
1034                 'ext': 'mp4',
1035                 'title': 'cinemasnob',
1036             },
1037         },
1038         # BrightcoveInPageEmbed embed
1039         {
1040             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1041             'info_dict': {
1042                 'id': '4238694884001',
1043                 'ext': 'flv',
1044                 'title': 'Tabletop: Dread, Last Thoughts',
1045                 'description': 'Tabletop: Dread, Last Thoughts',
1046                 'duration': 51690,
1047             },
1048         }
1049     ]
1050
1051     def report_following_redirect(self, new_url):
1052         """Report information extraction."""
1053         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1054
1055     def _extract_rss(self, url, video_id, doc):
1056         playlist_title = doc.find('./channel/title').text
1057         playlist_desc_el = doc.find('./channel/description')
1058         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1059
1060         entries = []
1061         for it in doc.findall('./channel/item'):
1062             next_url = xpath_text(it, 'link', fatal=False)
1063             if not next_url:
1064                 enclosure_nodes = it.findall('./enclosure')
1065                 for e in enclosure_nodes:
1066                     next_url = e.attrib.get('url')
1067                     if next_url:
1068                         break
1069
1070             if not next_url:
1071                 continue
1072
1073             entries.append({
1074                 '_type': 'url',
1075                 'url': next_url,
1076                 'title': it.find('title').text,
1077             })
1078
1079         return {
1080             '_type': 'playlist',
1081             'id': url,
1082             'title': playlist_title,
1083             'description': playlist_desc,
1084             'entries': entries,
1085         }
1086
1087     def _extract_camtasia(self, url, video_id, webpage):
1088         """ Returns None if no camtasia video can be found. """
1089
1090         camtasia_cfg = self._search_regex(
1091             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1092             webpage, 'camtasia configuration file', default=None)
1093         if camtasia_cfg is None:
1094             return None
1095
1096         title = self._html_search_meta('DC.title', webpage, fatal=True)
1097
1098         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1099         camtasia_cfg = self._download_xml(
1100             camtasia_url, video_id,
1101             note='Downloading camtasia configuration',
1102             errnote='Failed to download camtasia configuration')
1103         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1104
1105         entries = []
1106         for n in fileset_node.getchildren():
1107             url_n = n.find('./uri')
1108             if url_n is None:
1109                 continue
1110
1111             entries.append({
1112                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1113                 'title': '%s - %s' % (title, n.tag),
1114                 'url': compat_urlparse.urljoin(url, url_n.text),
1115                 'duration': float_or_none(n.find('./duration').text),
1116             })
1117
1118         return {
1119             '_type': 'playlist',
1120             'entries': entries,
1121             'title': title,
1122         }
1123
1124     def _real_extract(self, url):
1125         if url.startswith('//'):
1126             return {
1127                 '_type': 'url',
1128                 'url': self.http_scheme() + url,
1129             }
1130
1131         parsed_url = compat_urlparse.urlparse(url)
1132         if not parsed_url.scheme:
1133             default_search = self._downloader.params.get('default_search')
1134             if default_search is None:
1135                 default_search = 'fixup_error'
1136
1137             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1138                 if '/' in url:
1139                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1140                     return self.url_result('http://' + url)
1141                 elif default_search != 'fixup_error':
1142                     if default_search == 'auto_warning':
1143                         if re.match(r'^(?:url|URL)$', url):
1144                             raise ExtractorError(
1145                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1146                                 expected=True)
1147                         else:
1148                             self._downloader.report_warning(
1149                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1150                     return self.url_result('ytsearch:' + url)
1151
1152             if default_search in ('error', 'fixup_error'):
1153                 raise ExtractorError(
1154                     '%r is not a valid URL. '
1155                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1156                     % (url, url), expected=True)
1157             else:
1158                 if ':' not in default_search:
1159                     default_search += ':'
1160                 return self.url_result(default_search + url)
1161
1162         url, smuggled_data = unsmuggle_url(url)
1163         force_videoid = None
1164         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1165         if smuggled_data and 'force_videoid' in smuggled_data:
1166             force_videoid = smuggled_data['force_videoid']
1167             video_id = force_videoid
1168         else:
1169             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1170
1171         self.to_screen('%s: Requesting header' % video_id)
1172
1173         head_req = HEADRequest(url)
1174         head_response = self._request_webpage(
1175             head_req, video_id,
1176             note=False, errnote='Could not send HEAD request to %s' % url,
1177             fatal=False)
1178
1179         if head_response is not False:
1180             # Check for redirect
1181             new_url = head_response.geturl()
1182             if url != new_url:
1183                 self.report_following_redirect(new_url)
1184                 if force_videoid:
1185                     new_url = smuggle_url(
1186                         new_url, {'force_videoid': force_videoid})
1187                 return self.url_result(new_url)
1188
1189         full_response = None
1190         if head_response is False:
1191             request = compat_urllib_request.Request(url)
1192             request.add_header('Accept-Encoding', '*')
1193             full_response = self._request_webpage(request, video_id)
1194             head_response = full_response
1195
1196         # Check for direct link to a video
1197         content_type = head_response.headers.get('Content-Type', '')
1198         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1199         if m:
1200             upload_date = unified_strdate(
1201                 head_response.headers.get('Last-Modified'))
1202             return {
1203                 'id': video_id,
1204                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1205                 'direct': True,
1206                 'formats': [{
1207                     'format_id': m.group('format_id'),
1208                     'url': url,
1209                     'vcodec': 'none' if m.group('type') == 'audio' else None
1210                 }],
1211                 'upload_date': upload_date,
1212             }
1213
1214         if not self._downloader.params.get('test', False) and not is_intentional:
1215             force = self._downloader.params.get('force_generic_extractor', False)
1216             self._downloader.report_warning(
1217                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1218
1219         if not full_response:
1220             request = compat_urllib_request.Request(url)
1221             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1222             # making it impossible to download only chunk of the file (yet we need only 512kB to
1223             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1224             # that will always result in downloading the whole file that is not desirable.
1225             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1226             # to accept raw bytes and being able to download only a chunk.
1227             # It may probably better to solve this by checking Content-Type for application/octet-stream
1228             # after HEAD request finishes, but not sure if we can rely on this.
1229             request.add_header('Accept-Encoding', '*')
1230             full_response = self._request_webpage(request, video_id)
1231
1232         # Maybe it's a direct link to a video?
1233         # Be careful not to download the whole thing!
1234         first_bytes = full_response.read(512)
1235         if not is_html(first_bytes):
1236             self._downloader.report_warning(
1237                 'URL could be a direct video link, returning it as such.')
1238             upload_date = unified_strdate(
1239                 head_response.headers.get('Last-Modified'))
1240             return {
1241                 'id': video_id,
1242                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1243                 'direct': True,
1244                 'url': url,
1245                 'upload_date': upload_date,
1246             }
1247
1248         webpage = self._webpage_read_content(
1249             full_response, url, video_id, prefix=first_bytes)
1250
1251         self.report_extraction(video_id)
1252
1253         # Is it an RSS feed, a SMIL file or a XSPF playlist?
1254         try:
1255             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1256             if doc.tag == 'rss':
1257                 return self._extract_rss(url, video_id, doc)
1258             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1259                 return self._parse_smil(doc, url, video_id)
1260             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1261                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1262         except compat_xml_parse_error:
1263             pass
1264
1265         # Is it a Camtasia project?
1266         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1267         if camtasia_res is not None:
1268             return camtasia_res
1269
1270         # Sometimes embedded video player is hidden behind percent encoding
1271         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1272         # Unescaping the whole page allows to handle those cases in a generic way
1273         webpage = compat_urllib_parse_unquote(webpage)
1274
1275         # it's tempting to parse this further, but you would
1276         # have to take into account all the variations like
1277         #   Video Title - Site Name
1278         #   Site Name | Video Title
1279         #   Video Title - Tagline | Site Name
1280         # and so on and so forth; it's just not practical
1281         video_title = self._html_search_regex(
1282             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1283             default='video')
1284
1285         # Try to detect age limit automatically
1286         age_limit = self._rta_search(webpage)
1287         # And then there are the jokers who advertise that they use RTA,
1288         # but actually don't.
1289         AGE_LIMIT_MARKERS = [
1290             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1291         ]
1292         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1293             age_limit = 18
1294
1295         # video uploader is domain name
1296         video_uploader = self._search_regex(
1297             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1298
1299         # Helper method
1300         def _playlist_from_matches(matches, getter=None, ie=None):
1301             urlrs = orderedSet(
1302                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1303                 for m in matches)
1304             return self.playlist_result(
1305                 urlrs, playlist_id=video_id, playlist_title=video_title)
1306
1307         # Look for BrightCove:
1308         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1309         if bc_urls:
1310             self.to_screen('Brightcove video detected.')
1311             entries = [{
1312                 '_type': 'url',
1313                 'url': smuggle_url(bc_url, {'Referer': url}),
1314                 'ie_key': 'Brightcove'
1315             } for bc_url in bc_urls]
1316
1317             return {
1318                 '_type': 'playlist',
1319                 'title': video_title,
1320                 'id': video_id,
1321                 'entries': entries,
1322             }
1323
1324         # Look for Brightcove In Page Embed:
1325         brightcove_in_page_embed_url = BrightcoveInPageEmbedIE._extract_url(webpage)
1326         if brightcove_in_page_embed_url:
1327             return self.url_result(brightcove_in_page_embed_url, 'BrightcoveInPageEmbed')
1328
1329         # Look for embedded rtl.nl player
1330         matches = re.findall(
1331             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1332             webpage)
1333         if matches:
1334             return _playlist_from_matches(matches, ie='RtlNl')
1335
1336         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1337         if vimeo_url is not None:
1338             return self.url_result(vimeo_url)
1339
1340         vid_me_embed_url = self._search_regex(
1341             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1342             webpage, 'vid.me embed', default=None)
1343         if vid_me_embed_url is not None:
1344             return self.url_result(vid_me_embed_url, 'Vidme')
1345
1346         # Look for embedded YouTube player
1347         matches = re.findall(r'''(?x)
1348             (?:
1349                 <iframe[^>]+?src=|
1350                 data-video-url=|
1351                 <embed[^>]+?src=|
1352                 embedSWF\(?:\s*|
1353                 new\s+SWFObject\(
1354             )
1355             (["\'])
1356                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1357                 (?:embed|v|p)/.+?)
1358             \1''', webpage)
1359         if matches:
1360             return _playlist_from_matches(
1361                 matches, lambda m: unescapeHTML(m[1]))
1362
1363         # Look for lazyYT YouTube embed
1364         matches = re.findall(
1365             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1366         if matches:
1367             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1368
1369         # Look for embedded Dailymotion player
1370         matches = re.findall(
1371             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1372         if matches:
1373             return _playlist_from_matches(
1374                 matches, lambda m: unescapeHTML(m[1]))
1375
1376         # Look for embedded Dailymotion playlist player (#3822)
1377         m = re.search(
1378             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1379         if m:
1380             playlists = re.findall(
1381                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1382             if playlists:
1383                 return _playlist_from_matches(
1384                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1385
1386         # Look for embedded Wistia player
1387         match = re.search(
1388             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1389         if match:
1390             embed_url = self._proto_relative_url(
1391                 unescapeHTML(match.group('url')))
1392             return {
1393                 '_type': 'url_transparent',
1394                 'url': embed_url,
1395                 'ie_key': 'Wistia',
1396                 'uploader': video_uploader,
1397                 'title': video_title,
1398                 'id': video_id,
1399             }
1400
1401         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1402         if match:
1403             return {
1404                 '_type': 'url_transparent',
1405                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1406                 'ie_key': 'Wistia',
1407                 'uploader': video_uploader,
1408                 'title': video_title,
1409                 'id': match.group('id')
1410             }
1411
1412         # Look for embedded blip.tv player
1413         bliptv_url = BlipTVIE._extract_url(webpage)
1414         if bliptv_url:
1415             return self.url_result(bliptv_url, 'BlipTV')
1416
1417         # Look for SVT player
1418         svt_url = SVTIE._extract_url(webpage)
1419         if svt_url:
1420             return self.url_result(svt_url, 'SVT')
1421
1422         # Look for embedded condenast player
1423         matches = re.findall(
1424             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1425             webpage)
1426         if matches:
1427             return {
1428                 '_type': 'playlist',
1429                 'entries': [{
1430                     '_type': 'url',
1431                     'ie_key': 'CondeNast',
1432                     'url': ma,
1433                 } for ma in matches],
1434                 'title': video_title,
1435                 'id': video_id,
1436             }
1437
1438         # Look for Bandcamp pages with custom domain
1439         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1440         if mobj is not None:
1441             burl = unescapeHTML(mobj.group(1))
1442             # Don't set the extractor because it can be a track url or an album
1443             return self.url_result(burl)
1444
1445         # Look for embedded Vevo player
1446         mobj = re.search(
1447             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1448         if mobj is not None:
1449             return self.url_result(mobj.group('url'))
1450
1451         # Look for embedded Viddler player
1452         mobj = re.search(
1453             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1454             webpage)
1455         if mobj is not None:
1456             return self.url_result(mobj.group('url'))
1457
1458         # Look for NYTimes player
1459         mobj = re.search(
1460             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1461             webpage)
1462         if mobj is not None:
1463             return self.url_result(mobj.group('url'))
1464
1465         # Look for Libsyn player
1466         mobj = re.search(
1467             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1468         if mobj is not None:
1469             return self.url_result(mobj.group('url'))
1470
1471         # Look for Ooyala videos
1472         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1473                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1474                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1475                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1476         if mobj is not None:
1477             return OoyalaIE._build_url_result(mobj.group('ec'))
1478
1479         # Look for multiple Ooyala embeds on SBN network websites
1480         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1481         if mobj is not None:
1482             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1483             if embeds:
1484                 return _playlist_from_matches(
1485                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1486
1487         # Look for Aparat videos
1488         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1489         if mobj is not None:
1490             return self.url_result(mobj.group(1), 'Aparat')
1491
1492         # Look for MPORA videos
1493         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1494         if mobj is not None:
1495             return self.url_result(mobj.group(1), 'Mpora')
1496
1497         # Look for embedded NovaMov-based player
1498         mobj = re.search(
1499             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1500                     (?P<url>http://(?:(?:embed|www)\.)?
1501                         (?:novamov\.com|
1502                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1503                            videoweed\.(?:es|com)|
1504                            movshare\.(?:net|sx|ag)|
1505                            divxstage\.(?:eu|net|ch|co|at|ag))
1506                         /embed\.php.+?)\1''', webpage)
1507         if mobj is not None:
1508             return self.url_result(mobj.group('url'))
1509
1510         # Look for embedded Facebook player
1511         mobj = re.search(
1512             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1513         if mobj is not None:
1514             return self.url_result(mobj.group('url'), 'Facebook')
1515
1516         # Look for embedded VK player
1517         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1518         if mobj is not None:
1519             return self.url_result(mobj.group('url'), 'VK')
1520
1521         # Look for embedded ivi player
1522         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1523         if mobj is not None:
1524             return self.url_result(mobj.group('url'), 'Ivi')
1525
1526         # Look for embedded Huffington Post player
1527         mobj = re.search(
1528             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1529         if mobj is not None:
1530             return self.url_result(mobj.group('url'), 'HuffPost')
1531
1532         # Look for embed.ly
1533         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1534         if mobj is not None:
1535             return self.url_result(mobj.group('url'))
1536         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1537         if mobj is not None:
1538             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1539
1540         # Look for funnyordie embed
1541         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1542         if matches:
1543             return _playlist_from_matches(
1544                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1545
1546         # Look for BBC iPlayer embed
1547         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1548         if matches:
1549             return _playlist_from_matches(matches, ie='BBCCoUk')
1550
1551         # Look for embedded RUTV player
1552         rutv_url = RUTVIE._extract_url(webpage)
1553         if rutv_url:
1554             return self.url_result(rutv_url, 'RUTV')
1555
1556         # Look for embedded TVC player
1557         tvc_url = TVCIE._extract_url(webpage)
1558         if tvc_url:
1559             return self.url_result(tvc_url, 'TVC')
1560
1561         # Look for embedded SportBox player
1562         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1563         if sportbox_urls:
1564             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1565
1566         # Look for embedded PornHub player
1567         pornhub_url = PornHubIE._extract_url(webpage)
1568         if pornhub_url:
1569             return self.url_result(pornhub_url, 'PornHub')
1570
1571         # Look for embedded XHamster player
1572         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1573         if xhamster_urls:
1574             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1575
1576         # Look for embedded Tvigle player
1577         mobj = re.search(
1578             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1579         if mobj is not None:
1580             return self.url_result(mobj.group('url'), 'Tvigle')
1581
1582         # Look for embedded TED player
1583         mobj = re.search(
1584             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1585         if mobj is not None:
1586             return self.url_result(mobj.group('url'), 'TED')
1587
1588         # Look for embedded Ustream videos
1589         mobj = re.search(
1590             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1591         if mobj is not None:
1592             return self.url_result(mobj.group('url'), 'Ustream')
1593
1594         # Look for embedded arte.tv player
1595         mobj = re.search(
1596             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1597             webpage)
1598         if mobj is not None:
1599             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1600
1601         # Look for embedded francetv player
1602         mobj = re.search(
1603             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1604             webpage)
1605         if mobj is not None:
1606             return self.url_result(mobj.group('url'))
1607
1608         # Look for embedded smotri.com player
1609         smotri_url = SmotriIE._extract_url(webpage)
1610         if smotri_url:
1611             return self.url_result(smotri_url, 'Smotri')
1612
1613         # Look for embedded Myvi.ru player
1614         myvi_url = MyviIE._extract_url(webpage)
1615         if myvi_url:
1616             return self.url_result(myvi_url)
1617
1618         # Look for embeded soundcloud player
1619         mobj = re.search(
1620             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1621             webpage)
1622         if mobj is not None:
1623             url = unescapeHTML(mobj.group('url'))
1624             return self.url_result(url)
1625
1626         # Look for embedded vulture.com player
1627         mobj = re.search(
1628             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1629             webpage)
1630         if mobj is not None:
1631             url = unescapeHTML(mobj.group('url'))
1632             return self.url_result(url, ie='Vulture')
1633
1634         # Look for embedded mtvservices player
1635         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1636         if mtvservices_url:
1637             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1638
1639         # Look for embedded yahoo player
1640         mobj = re.search(
1641             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1642             webpage)
1643         if mobj is not None:
1644             return self.url_result(mobj.group('url'), 'Yahoo')
1645
1646         # Look for embedded sbs.com.au player
1647         mobj = re.search(
1648             r'''(?x)
1649             (?:
1650                 <meta\s+property="og:video"\s+content=|
1651                 <iframe[^>]+?src=
1652             )
1653             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1654             webpage)
1655         if mobj is not None:
1656             return self.url_result(mobj.group('url'), 'SBS')
1657
1658         # Look for embedded Cinchcast player
1659         mobj = re.search(
1660             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1661             webpage)
1662         if mobj is not None:
1663             return self.url_result(mobj.group('url'), 'Cinchcast')
1664
1665         mobj = re.search(
1666             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1667             webpage)
1668         if not mobj:
1669             mobj = re.search(
1670                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1671                 webpage)
1672         if mobj is not None:
1673             return self.url_result(mobj.group('url'), 'MLB')
1674
1675         mobj = re.search(
1676             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1677             webpage)
1678         if mobj is not None:
1679             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1680
1681         mobj = re.search(
1682             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1683             webpage)
1684         if mobj is not None:
1685             return self.url_result(mobj.group('url'), 'Livestream')
1686
1687         # Look for Zapiks embed
1688         mobj = re.search(
1689             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1690         if mobj is not None:
1691             return self.url_result(mobj.group('url'), 'Zapiks')
1692
1693         # Look for Kaltura embeds
1694         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
1695                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
1696         if mobj is not None:
1697             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1698
1699         # Look for Eagle.Platform embeds
1700         mobj = re.search(
1701             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1702         if mobj is not None:
1703             return self.url_result(mobj.group('url'), 'EaglePlatform')
1704
1705         # Look for ClipYou (uses Eagle.Platform) embeds
1706         mobj = re.search(
1707             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1708         if mobj is not None:
1709             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1710
1711         # Look for Pladform embeds
1712         mobj = re.search(
1713             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1714         if mobj is not None:
1715             return self.url_result(mobj.group('url'), 'Pladform')
1716
1717         # Look for Playwire embeds
1718         mobj = re.search(
1719             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1720         if mobj is not None:
1721             return self.url_result(mobj.group('url'))
1722
1723         # Look for 5min embeds
1724         mobj = re.search(
1725             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1726         if mobj is not None:
1727             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1728
1729         # Look for Crooks and Liars embeds
1730         mobj = re.search(
1731             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1732         if mobj is not None:
1733             return self.url_result(mobj.group('url'))
1734
1735         # Look for NBC Sports VPlayer embeds
1736         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1737         if nbc_sports_url:
1738             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1739
1740         # Look for UDN embeds
1741         mobj = re.search(
1742             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1743         if mobj is not None:
1744             return self.url_result(
1745                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1746
1747         # Look for Senate ISVP iframe
1748         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1749         if senate_isvp_url:
1750             return self.url_result(senate_isvp_url, 'SenateISVP')
1751
1752         # Look for Dailymotion Cloud videos
1753         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1754         if dmcloud_url:
1755             return self.url_result(dmcloud_url, 'DailymotionCloud')
1756
1757         # Look for OnionStudios embeds
1758         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1759         if onionstudios_url:
1760             return self.url_result(onionstudios_url)
1761
1762         # Look for SnagFilms embeds
1763         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1764         if snagfilms_url:
1765             return self.url_result(snagfilms_url)
1766
1767         # Look for ScreenwaveMedia embeds
1768         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
1769         if mobj is not None:
1770             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
1771
1772         # Look for AdobeTVVideo embeds
1773         mobj = re.search(
1774             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1775             webpage)
1776         if mobj is not None:
1777             return self.url_result(
1778                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1779                 'AdobeTVVideo')
1780
1781         def check_video(vurl):
1782             if YoutubeIE.suitable(vurl):
1783                 return True
1784             vpath = compat_urlparse.urlparse(vurl).path
1785             vext = determine_ext(vpath)
1786             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1787
1788         def filter_video(urls):
1789             return list(filter(check_video, urls))
1790
1791         # Start with something easy: JW Player in SWFObject
1792         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1793         if not found:
1794             # Look for gorilla-vid style embedding
1795             found = filter_video(re.findall(r'''(?sx)
1796                 (?:
1797                     jw_plugins|
1798                     JWPlayerOptions|
1799                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1800                 )
1801                 .*?
1802                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1803         if not found:
1804             # Broaden the search a little bit
1805             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1806         if not found:
1807             # Broaden the findall a little bit: JWPlayer JS loader
1808             found = filter_video(re.findall(
1809                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1810         if not found:
1811             # Flow player
1812             found = filter_video(re.findall(r'''(?xs)
1813                 flowplayer\("[^"]+",\s*
1814                     \{[^}]+?\}\s*,
1815                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1816                         ["']?url["']?\s*:\s*["']([^"']+)["']
1817             ''', webpage))
1818         if not found:
1819             # Cinerama player
1820             found = re.findall(
1821                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1822         if not found:
1823             # Try to find twitter cards info
1824             found = filter_video(re.findall(
1825                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1826         if not found:
1827             # We look for Open Graph info:
1828             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1829             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1830             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1831             if m_video_type is not None:
1832                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1833         if not found:
1834             # HTML5 video
1835             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1836         if not found:
1837             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1838             found = re.search(
1839                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1840                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1841                 webpage)
1842             if not found:
1843                 # Look also in Refresh HTTP header
1844                 refresh_header = head_response.headers.get('Refresh')
1845                 if refresh_header:
1846                     # In python 2 response HTTP headers are bytestrings
1847                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
1848                         refresh_header = refresh_header.decode('iso-8859-1')
1849                     found = re.search(REDIRECT_REGEX, refresh_header)
1850             if found:
1851                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
1852                 self.report_following_redirect(new_url)
1853                 return {
1854                     '_type': 'url',
1855                     'url': new_url,
1856                 }
1857         if not found:
1858             raise UnsupportedError(url)
1859
1860         entries = []
1861         for video_url in found:
1862             video_url = compat_urlparse.urljoin(url, video_url)
1863             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
1864
1865             # Sometimes, jwplayer extraction will result in a YouTube URL
1866             if YoutubeIE.suitable(video_url):
1867                 entries.append(self.url_result(video_url, 'Youtube'))
1868                 continue
1869
1870             # here's a fun little line of code for you:
1871             video_id = os.path.splitext(video_id)[0]
1872
1873             ext = determine_ext(video_url)
1874             if ext == 'smil':
1875                 entries.append({
1876                     'id': video_id,
1877                     'formats': self._extract_smil_formats(video_url, video_id),
1878                     'uploader': video_uploader,
1879                     'title': video_title,
1880                     'age_limit': age_limit,
1881                 })
1882             elif ext == 'xspf':
1883                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
1884             else:
1885                 entries.append({
1886                     'id': video_id,
1887                     'url': video_url,
1888                     'uploader': video_uploader,
1889                     'title': video_title,
1890                     'age_limit': age_limit,
1891                 })
1892
1893         if len(entries) == 1:
1894             return entries[0]
1895         else:
1896             for num, e in enumerate(entries, start=1):
1897                 # 'url' results don't have a title
1898                 if e.get('title') is not None:
1899                     e['title'] = '%s (%d)' % (e['title'], num)
1900             return {
1901                 '_type': 'playlist',
1902                 'entries': entries,
1903             }