[extractor/generic] Add support for condenast script embeds (Closes #6885, closes...
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7 import sys
8
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12     compat_urllib_parse_unquote,
13     compat_urllib_request,
14     compat_urlparse,
15     compat_xml_parse_error,
16 )
17 from ..utils import (
18     determine_ext,
19     ExtractorError,
20     float_or_none,
21     HEADRequest,
22     is_html,
23     orderedSet,
24     parse_xml,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import BrightcoveIE
34 from .nbc import NBCSportsVPlayerIE
35 from .ooyala import OoyalaIE
36 from .rutv import RUTVIE
37 from .tvc import TVCIE
38 from .sportbox import SportBoxEmbedIE
39 from .smotri import SmotriIE
40 from .myvi import MyviIE
41 from .condenast import CondeNastIE
42 from .udn import UDNEmbedIE
43 from .senateisvp import SenateISVPIE
44 from .bliptv import BlipTVIE
45 from .svt import SVTIE
46 from .pornhub import PornHubIE
47 from .xhamster import XHamsterEmbedIE
48 from .vimeo import VimeoIE
49 from .dailymotion import DailymotionCloudIE
50 from .onionstudios import OnionStudiosIE
51 from .snagfilms import SnagFilmsEmbedIE
52 from .screenwavemedia import ScreenwaveMediaIE
53 from .mtv import MTVServicesEmbeddedIE
54
55
56 class GenericIE(InfoExtractor):
57     IE_DESC = 'Generic downloader that works on some sites'
58     _VALID_URL = r'.*'
59     IE_NAME = 'generic'
60     _TESTS = [
61         # Direct link to a video
62         {
63             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
64             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
65             'info_dict': {
66                 'id': 'trailer',
67                 'ext': 'mp4',
68                 'title': 'trailer',
69                 'upload_date': '20100513',
70             }
71         },
72         # Direct link to media delivered compressed (until Accept-Encoding is *)
73         {
74             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
75             'md5': '128c42e68b13950268b648275386fc74',
76             'info_dict': {
77                 'id': 'FictionJunction-Parallel_Hearts',
78                 'ext': 'flac',
79                 'title': 'FictionJunction-Parallel_Hearts',
80                 'upload_date': '20140522',
81             },
82             'expected_warnings': [
83                 'URL could be a direct video link, returning it as such.'
84             ]
85         },
86         # Direct download with broken HEAD
87         {
88             'url': 'http://ai-radio.org:8000/radio.opus',
89             'info_dict': {
90                 'id': 'radio',
91                 'ext': 'opus',
92                 'title': 'radio',
93             },
94             'params': {
95                 'skip_download': True,  # infinite live stream
96             },
97             'expected_warnings': [
98                 r'501.*Not Implemented'
99             ],
100         },
101         # Direct link with incorrect MIME type
102         {
103             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
104             'md5': '4ccbebe5f36706d85221f204d7eb5913',
105             'info_dict': {
106                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
107                 'id': '5_Lennart_Poettering_-_Systemd',
108                 'ext': 'webm',
109                 'title': '5_Lennart_Poettering_-_Systemd',
110                 'upload_date': '20141120',
111             },
112             'expected_warnings': [
113                 'URL could be a direct video link, returning it as such.'
114             ]
115         },
116         # RSS feed
117         {
118             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
119             'info_dict': {
120                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
121                 'title': 'Zero Punctuation',
122                 'description': 're:.*groundbreaking video review series.*'
123             },
124             'playlist_mincount': 11,
125         },
126         # RSS feed with enclosure
127         {
128             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
129             'info_dict': {
130                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
131                 'ext': 'm4v',
132                 'upload_date': '20150228',
133                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
134             }
135         },
136         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
137         {
138             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
139             'info_dict': {
140                 'id': 'smil',
141                 'ext': 'mp4',
142                 'title': 'Automatics, robotics and biocybernetics',
143                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
144                 'formats': 'mincount:16',
145                 'subtitles': 'mincount:1',
146             },
147             'params': {
148                 'force_generic_extractor': True,
149                 'skip_download': True,
150             },
151         },
152         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
153         {
154             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
155             'info_dict': {
156                 'id': 'hds',
157                 'ext': 'flv',
158                 'title': 'hds',
159                 'formats': 'mincount:1',
160             },
161             'params': {
162                 'skip_download': True,
163             },
164         },
165         # SMIL from https://www.restudy.dk/video/play/id/1637
166         {
167             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
168             'info_dict': {
169                 'id': 'video_1637',
170                 'ext': 'flv',
171                 'title': 'video_1637',
172                 'formats': 'mincount:3',
173             },
174             'params': {
175                 'skip_download': True,
176             },
177         },
178         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
179         {
180             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
181             'info_dict': {
182                 'id': 'smil-service',
183                 'ext': 'flv',
184                 'title': 'smil-service',
185                 'formats': 'mincount:1',
186             },
187             'params': {
188                 'skip_download': True,
189             },
190         },
191         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
192         {
193             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
194             'info_dict': {
195                 'id': '4719370',
196                 'ext': 'mp4',
197                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
198                 'formats': 'mincount:3',
199             },
200             'params': {
201                 'skip_download': True,
202             },
203         },
204         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
205         {
206             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
207             'info_dict': {
208                 'id': 'mZlp2ctYIUEB',
209                 'ext': 'mp4',
210                 'title': 'Tikibad ontruimd wegens brand',
211                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
212                 'thumbnail': 're:^https?://.*\.jpg$',
213                 'duration': 33,
214             },
215             'params': {
216                 'skip_download': True,
217             },
218         },
219         # google redirect
220         {
221             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
222             'info_dict': {
223                 'id': 'cmQHVoWB5FY',
224                 'ext': 'mp4',
225                 'upload_date': '20130224',
226                 'uploader_id': 'TheVerge',
227                 'description': 're:^Chris Ziegler takes a look at the\.*',
228                 'uploader': 'The Verge',
229                 'title': 'First Firefox OS phones side-by-side',
230             },
231             'params': {
232                 'skip_download': False,
233             }
234         },
235         {
236             # redirect in Refresh HTTP header
237             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
238             'info_dict': {
239                 'id': 'pO8h3EaFRdo',
240                 'ext': 'mp4',
241                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
242                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
243                 'upload_date': '20150917',
244                 'uploader_id': 'brtvofficial',
245                 'uploader': 'Boiler Room',
246             },
247             'params': {
248                 'skip_download': False,
249             },
250         },
251         {
252             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
253             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
254             'info_dict': {
255                 'id': '13601338388002',
256                 'ext': 'mp4',
257                 'uploader': 'www.hodiho.fr',
258                 'title': 'R\u00e9gis plante sa Jeep',
259             }
260         },
261         # bandcamp page with custom domain
262         {
263             'add_ie': ['Bandcamp'],
264             'url': 'http://bronyrock.com/track/the-pony-mash',
265             'info_dict': {
266                 'id': '3235767654',
267                 'ext': 'mp3',
268                 'title': 'The Pony Mash',
269                 'uploader': 'M_Pallante',
270             },
271             'skip': 'There is a limit of 200 free downloads / month for the test song',
272         },
273         # embedded brightcove video
274         # it also tests brightcove videos that need to set the 'Referer' in the
275         # http requests
276         {
277             'add_ie': ['Brightcove'],
278             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
279             'info_dict': {
280                 'id': '2765128793001',
281                 'ext': 'mp4',
282                 'title': 'Le cours de bourse : l’analyse technique',
283                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
284                 'uploader': 'BFM BUSINESS',
285             },
286             'params': {
287                 'skip_download': True,
288             },
289         },
290         {
291             # https://github.com/rg3/youtube-dl/issues/2253
292             'url': 'http://bcove.me/i6nfkrc3',
293             'md5': '0ba9446db037002366bab3b3eb30c88c',
294             'info_dict': {
295                 'id': '3101154703001',
296                 'ext': 'mp4',
297                 'title': 'Still no power',
298                 'uploader': 'thestar.com',
299                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
300             },
301             'add_ie': ['Brightcove'],
302         },
303         {
304             'url': 'http://www.championat.com/video/football/v/87/87499.html',
305             'md5': 'fb973ecf6e4a78a67453647444222983',
306             'info_dict': {
307                 'id': '3414141473001',
308                 'ext': 'mp4',
309                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
310                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
311                 'uploader': 'Championat',
312             },
313         },
314         {
315             # https://github.com/rg3/youtube-dl/issues/3541
316             'add_ie': ['Brightcove'],
317             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
318             'info_dict': {
319                 'id': '3866516442001',
320                 'ext': 'mp4',
321                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
322                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
323                 'uploader': 'SBS Broadcasting',
324             },
325             'skip': 'Restricted to Netherlands',
326             'params': {
327                 'skip_download': True,  # m3u8 download
328             },
329         },
330         # ooyala video
331         {
332             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
333             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
334             'info_dict': {
335                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
336                 'ext': 'mp4',
337                 'title': '2cc213299525360.mov',  # that's what we get
338             },
339             'add_ie': ['Ooyala'],
340         },
341         {
342             # ooyala video embedded with http://player.ooyala.com/iframe.js
343             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
344             'info_dict': {
345                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
346                 'ext': 'mp4',
347                 'title': '"Steve Jobs: Man in the Machine" trailer',
348                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
349             },
350             'params': {
351                 'skip_download': True,
352             },
353         },
354         # multiple ooyala embeds on SBN network websites
355         {
356             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
357             'info_dict': {
358                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
359                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
360             },
361             'playlist_mincount': 3,
362             'params': {
363                 'skip_download': True,
364             },
365             'add_ie': ['Ooyala'],
366         },
367         # embed.ly video
368         {
369             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
370             'info_dict': {
371                 'id': '9ODmcdjQcHQ',
372                 'ext': 'mp4',
373                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
374                 'upload_date': '20140225',
375                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
376                 'uploader': 'Tested',
377                 'uploader_id': 'testedcom',
378             },
379             # No need to test YoutubeIE here
380             'params': {
381                 'skip_download': True,
382             },
383         },
384         # funnyordie embed
385         {
386             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
387             'info_dict': {
388                 'id': '18e820ec3f',
389                 'ext': 'mp4',
390                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
391                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
392             },
393         },
394         # RUTV embed
395         {
396             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
397             'info_dict': {
398                 'id': '776940',
399                 'ext': 'mp4',
400                 'title': 'Охотское море стало целиком российским',
401                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
402             },
403             'params': {
404                 # m3u8 download
405                 'skip_download': True,
406             },
407         },
408         # TVC embed
409         {
410             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
411             'info_dict': {
412                 'id': '55304',
413                 'ext': 'mp4',
414                 'title': 'Дошкольное воспитание',
415             },
416         },
417         # SportBox embed
418         {
419             'url': 'http://www.vestifinance.ru/articles/25753',
420             'info_dict': {
421                 'id': '25753',
422                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
423             },
424             'playlist': [{
425                 'info_dict': {
426                     'id': '370908',
427                     'title': 'Госзаказ. День 3',
428                     'ext': 'mp4',
429                 }
430             }, {
431                 'info_dict': {
432                     'id': '370905',
433                     'title': 'Госзаказ. День 2',
434                     'ext': 'mp4',
435                 }
436             }, {
437                 'info_dict': {
438                     'id': '370902',
439                     'title': 'Госзаказ. День 1',
440                     'ext': 'mp4',
441                 }
442             }],
443             'params': {
444                 # m3u8 download
445                 'skip_download': True,
446             },
447         },
448         # Myvi.ru embed
449         {
450             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
451             'info_dict': {
452                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
453                 'ext': 'mp4',
454                 'title': 'Ужастики, русский трейлер (2015)',
455                 'thumbnail': 're:^https?://.*\.jpg$',
456                 'duration': 153,
457             }
458         },
459         # XHamster embed
460         {
461             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
462             'info_dict': {
463                 'id': 'showthread',
464                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
465             },
466             'playlist_mincount': 7,
467         },
468         # Embedded TED video
469         {
470             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
471             'md5': '65fdff94098e4a607385a60c5177c638',
472             'info_dict': {
473                 'id': '1969',
474                 'ext': 'mp4',
475                 'title': 'Hidden miracles of the natural world',
476                 'uploader': 'Louie Schwartzberg',
477                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
478             }
479         },
480         # Embeded Ustream video
481         {
482             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
483             'md5': '27b99cdb639c9b12a79bca876a073417',
484             'info_dict': {
485                 'id': '45734260',
486                 'ext': 'flv',
487                 'uploader': 'AU SPA:  The NSA and Privacy',
488                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
489             }
490         },
491         # nowvideo embed hidden behind percent encoding
492         {
493             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
494             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
495             'info_dict': {
496                 'id': '06e53103ca9aa',
497                 'ext': 'flv',
498                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
499                 'description': 'No description',
500             },
501         },
502         # arte embed
503         {
504             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
505             'md5': '7653032cbb25bf6c80d80f217055fa43',
506             'info_dict': {
507                 'id': '048195-004_PLUS7-F',
508                 'ext': 'flv',
509                 'title': 'X:enius',
510                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
511                 'upload_date': '20140320',
512             },
513             'params': {
514                 'skip_download': 'Requires rtmpdump'
515             }
516         },
517         # francetv embed
518         {
519             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
520             'info_dict': {
521                 'id': 'EV_30231',
522                 'ext': 'mp4',
523                 'title': 'Alcaline, le concert avec Calogero',
524                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
525                 'upload_date': '20150226',
526                 'timestamp': 1424989860,
527                 'duration': 5400,
528             },
529             'params': {
530                 # m3u8 downloads
531                 'skip_download': True,
532             },
533             'expected_warnings': [
534                 'Forbidden'
535             ]
536         },
537         # Condé Nast embed
538         {
539             'url': 'http://www.wired.com/2014/04/honda-asimo/',
540             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
541             'info_dict': {
542                 'id': '53501be369702d3275860000',
543                 'ext': 'mp4',
544                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
545             }
546         },
547         # Dailymotion embed
548         {
549             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
550             'md5': '441aeeb82eb72c422c7f14ec533999cd',
551             'info_dict': {
552                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
553                 'ext': 'mp4',
554                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
555                 'uploader': 'Spi0n',
556             },
557             'add_ie': ['Dailymotion'],
558         },
559         # YouTube embed
560         {
561             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
562             'info_dict': {
563                 'id': 'FXRb4ykk4S0',
564                 'ext': 'mp4',
565                 'title': 'The NBL Auction 2014',
566                 'uploader': 'BADMINTON England',
567                 'uploader_id': 'BADMINTONEvents',
568                 'upload_date': '20140603',
569                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
570             },
571             'add_ie': ['Youtube'],
572             'params': {
573                 'skip_download': True,
574             }
575         },
576         # MTVSercices embed
577         {
578             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
579             'md5': '35727f82f58c76d996fc188f9755b0d5',
580             'info_dict': {
581                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
582                 'ext': 'mp4',
583                 'title': 'Review',
584                 'description': 'Mario\'s life in the fast lane has never looked so good.',
585             },
586         },
587         # YouTube embed via <data-embed-url="">
588         {
589             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
590             'info_dict': {
591                 'id': '4vAffPZIT44',
592                 'ext': 'mp4',
593                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
594                 'uploader': 'Gameloft',
595                 'uploader_id': 'gameloft',
596                 'upload_date': '20140828',
597                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
598             },
599             'params': {
600                 'skip_download': True,
601             }
602         },
603         # Camtasia studio
604         {
605             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
606             'playlist': [{
607                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
608                 'info_dict': {
609                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
610                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
611                     'ext': 'flv',
612                     'duration': 2235.90,
613                 }
614             }, {
615                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
616                 'info_dict': {
617                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
618                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
619                     'ext': 'flv',
620                     'duration': 2235.93,
621                 }
622             }],
623             'info_dict': {
624                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
625             }
626         },
627         # Flowplayer
628         {
629             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
630             'md5': '9d65602bf31c6e20014319c7d07fba27',
631             'info_dict': {
632                 'id': '5123ea6d5e5a7',
633                 'ext': 'mp4',
634                 'age_limit': 18,
635                 'uploader': 'www.handjobhub.com',
636                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
637             }
638         },
639         # Multiple brightcove videos
640         # https://github.com/rg3/youtube-dl/issues/2283
641         {
642             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
643             'info_dict': {
644                 'id': 'always-never',
645                 'title': 'Always / Never - The New Yorker',
646             },
647             'playlist_count': 3,
648             'params': {
649                 'extract_flat': False,
650                 'skip_download': True,
651             }
652         },
653         # MLB embed
654         {
655             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
656             'md5': '96f09a37e44da40dd083e12d9a683327',
657             'info_dict': {
658                 'id': '33322633',
659                 'ext': 'mp4',
660                 'title': 'Ump changes call to ball',
661                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
662                 'duration': 48,
663                 'timestamp': 1401537900,
664                 'upload_date': '20140531',
665                 'thumbnail': 're:^https?://.*\.jpg$',
666             },
667         },
668         # Wistia embed
669         {
670             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
671             'md5': '8788b683c777a5cf25621eaf286d0c23',
672             'info_dict': {
673                 'id': '1cfaf6b7ea',
674                 'ext': 'mov',
675                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
676                 'duration': 643.0,
677                 'filesize': 182808282,
678                 'uploader': 'education-portal.com',
679             },
680         },
681         {
682             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
683             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
684             'info_dict': {
685                 'id': 'uxjb0lwrcz',
686                 'ext': 'mp4',
687                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
688                 'duration': 1715.0,
689                 'uploader': 'thoughtworks.wistia.com',
690             },
691         },
692         # Soundcloud embed
693         {
694             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
695             'info_dict': {
696                 'id': '174391317',
697                 'ext': 'mp3',
698                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
699                 'uploader': 'Sophos Security',
700                 'title': 'Chet Chat 171 - Oct 29, 2014',
701                 'upload_date': '20141029',
702             }
703         },
704         # Livestream embed
705         {
706             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
707             'info_dict': {
708                 'id': '67864563',
709                 'ext': 'flv',
710                 'upload_date': '20141112',
711                 'title': 'Rosetta #CometLanding webcast HL 10',
712             }
713         },
714         # LazyYT
715         {
716             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
717             'info_dict': {
718                 'id': '1986',
719                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
720             },
721             'playlist_mincount': 2,
722         },
723         # Cinchcast embed
724         {
725             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
726             'info_dict': {
727                 'id': '7141703',
728                 'ext': 'mp3',
729                 'upload_date': '20141126',
730                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
731             }
732         },
733         # Cinerama player
734         {
735             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
736             'info_dict': {
737                 'id': '730m_DandD_1901_512k',
738                 'ext': 'mp4',
739                 'uploader': 'www.abc.net.au',
740                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
741             }
742         },
743         # embedded viddler video
744         {
745             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
746             'info_dict': {
747                 'id': '4d03aad9',
748                 'ext': 'mp4',
749                 'uploader': 'deadspin',
750                 'title': 'WALL-TO-GORTAT',
751                 'timestamp': 1422285291,
752                 'upload_date': '20150126',
753             },
754             'add_ie': ['Viddler'],
755         },
756         # Libsyn embed
757         {
758             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
759             'info_dict': {
760                 'id': '3377616',
761                 'ext': 'mp3',
762                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
763                 'description': 'md5:601cb790edd05908957dae8aaa866465',
764                 'upload_date': '20150220',
765             },
766         },
767         # jwplayer YouTube
768         {
769             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
770             'info_dict': {
771                 'id': 'Mrj4DVp2zeA',
772                 'ext': 'mp4',
773                 'upload_date': '20150212',
774                 'uploader': 'The National Archives UK',
775                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
776                 'uploader_id': 'NationalArchives08',
777                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
778             },
779         },
780         # rtl.nl embed
781         {
782             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
783             'playlist_mincount': 5,
784             'info_dict': {
785                 'id': 'aanslagen-kopenhagen',
786                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
787             }
788         },
789         # Zapiks embed
790         {
791             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
792             'info_dict': {
793                 'id': '118046',
794                 'ext': 'mp4',
795                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
796             }
797         },
798         # Kaltura embed
799         {
800             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
801             'info_dict': {
802                 'id': '1_eergr3h1',
803                 'ext': 'mp4',
804                 'upload_date': '20150226',
805                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
806                 'timestamp': int,
807                 'title': 'John Carlson Postgame 2/25/15',
808             },
809         },
810         # Kaltura embed (different embed code)
811         {
812             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
813             'info_dict': {
814                 'id': '1_a52wc67y',
815                 'ext': 'flv',
816                 'upload_date': '20150127',
817                 'uploader_id': 'PremierMedia',
818                 'timestamp': int,
819                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
820             },
821         },
822         # Eagle.Platform embed (generic URL)
823         {
824             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
825             'info_dict': {
826                 'id': '227304',
827                 'ext': 'mp4',
828                 'title': 'Навальный вышел на свободу',
829                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
830                 'thumbnail': 're:^https?://.*\.jpg$',
831                 'duration': 87,
832                 'view_count': int,
833                 'age_limit': 0,
834             },
835         },
836         # ClipYou (Eagle.Platform) embed (custom URL)
837         {
838             'url': 'http://muz-tv.ru/play/7129/',
839             'info_dict': {
840                 'id': '12820',
841                 'ext': 'mp4',
842                 'title': "'O Sole Mio",
843                 'thumbnail': 're:^https?://.*\.jpg$',
844                 'duration': 216,
845                 'view_count': int,
846             },
847         },
848         # Pladform embed
849         {
850             'url': 'http://muz-tv.ru/kinozal/view/7400/',
851             'info_dict': {
852                 'id': '100183293',
853                 'ext': 'mp4',
854                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
855                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
856                 'thumbnail': 're:^https?://.*\.jpg$',
857                 'duration': 694,
858                 'age_limit': 0,
859             },
860         },
861         # Playwire embed
862         {
863             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
864             'info_dict': {
865                 'id': '3519514',
866                 'ext': 'mp4',
867                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
868                 'thumbnail': 're:^https?://.*\.png$',
869                 'duration': 45.115,
870             },
871         },
872         # 5min embed
873         {
874             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
875             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
876             'info_dict': {
877                 'id': '518726732',
878                 'ext': 'mp4',
879                 'title': 'Facebook Creates "On This Day" | Crunch Report',
880             },
881         },
882         # SVT embed
883         {
884             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
885             'info_dict': {
886                 'id': '2900353',
887                 'ext': 'flv',
888                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
889                 'duration': 27,
890                 'age_limit': 0,
891             },
892         },
893         # Crooks and Liars embed
894         {
895             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
896             'info_dict': {
897                 'id': '8RUoRhRi',
898                 'ext': 'mp4',
899                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
900                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
901                 'timestamp': 1428207000,
902                 'upload_date': '20150405',
903                 'uploader': 'Heather',
904             },
905         },
906         # Crooks and Liars external embed
907         {
908             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
909             'info_dict': {
910                 'id': 'MTE3MjUtMzQ2MzA',
911                 'ext': 'mp4',
912                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
913                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
914                 'timestamp': 1265032391,
915                 'upload_date': '20100201',
916                 'uploader': 'Heather',
917             },
918         },
919         # NBC Sports vplayer embed
920         {
921             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
922             'info_dict': {
923                 'id': 'ln7x1qSThw4k',
924                 'ext': 'flv',
925                 'title': "PFT Live: New leader in the 'new-look' defense",
926                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
927             },
928         },
929         # UDN embed
930         {
931             'url': 'http://www.udn.com/news/story/7314/822787',
932             'md5': 'fd2060e988c326991037b9aff9df21a6',
933             'info_dict': {
934                 'id': '300346',
935                 'ext': 'mp4',
936                 'title': '中一中男師變性 全校師生力挺',
937                 'thumbnail': 're:^https?://.*\.jpg$',
938             }
939         },
940         # Ooyala embed
941         {
942             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
943             'info_dict': {
944                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
945                 'ext': 'mp4',
946                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
947                 'title': 'This is what separates the Excel masters from the wannabes',
948             },
949             'params': {
950                 # m3u8 downloads
951                 'skip_download': True,
952             }
953         },
954         # Contains a SMIL manifest
955         {
956             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
957             'info_dict': {
958                 'id': 'file',
959                 'ext': 'flv',
960                 'title': '+ Football: Lottery Champions League Europe',
961                 'uploader': 'www.telewebion.com',
962             },
963             'params': {
964                 # rtmpe downloads
965                 'skip_download': True,
966             }
967         },
968         # Brightcove URL in single quotes
969         {
970             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
971             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
972             'info_dict': {
973                 'id': '4255764656001',
974                 'ext': 'mp4',
975                 'title': 'SN Presents: Russell Martin, World Citizen',
976                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
977                 'uploader': 'Rogers Sportsnet',
978             },
979         },
980         # Dailymotion Cloud video
981         {
982             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
983             'md5': '49444254273501a64675a7e68c502681',
984             'info_dict': {
985                 'id': '5585de919473990de4bee11b',
986                 'ext': 'mp4',
987                 'title': 'Le débat',
988                 'thumbnail': 're:^https?://.*\.jpe?g$',
989             }
990         },
991         # OnionStudios embed
992         {
993             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
994             'info_dict': {
995                 'id': '2855',
996                 'ext': 'mp4',
997                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
998                 'thumbnail': 're:^https?://.*\.jpe?g$',
999                 'uploader': 'ClickHole',
1000                 'uploader_id': 'clickhole',
1001             }
1002         },
1003         # SnagFilms embed
1004         {
1005             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1006             'info_dict': {
1007                 'id': '74849a00-85a9-11e1-9660-123139220831',
1008                 'ext': 'mp4',
1009                 'title': '#whilewewatch',
1010             }
1011         },
1012         # AdobeTVVideo embed
1013         {
1014             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1015             'md5': '43662b577c018ad707a63766462b1e87',
1016             'info_dict': {
1017                 'id': '2456',
1018                 'ext': 'mp4',
1019                 'title': 'New experience with Acrobat DC',
1020                 'description': 'New experience with Acrobat DC',
1021                 'duration': 248.667,
1022             },
1023         },
1024         # ScreenwaveMedia embed
1025         {
1026             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1027             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1028             'info_dict': {
1029                 'id': 'cinemasnob-55d26273809dd',
1030                 'ext': 'mp4',
1031                 'title': 'cinemasnob',
1032             },
1033         }
1034     ]
1035
1036     def report_following_redirect(self, new_url):
1037         """Report information extraction."""
1038         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1039
1040     def _extract_rss(self, url, video_id, doc):
1041         playlist_title = doc.find('./channel/title').text
1042         playlist_desc_el = doc.find('./channel/description')
1043         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1044
1045         entries = []
1046         for it in doc.findall('./channel/item'):
1047             next_url = xpath_text(it, 'link', fatal=False)
1048             if not next_url:
1049                 enclosure_nodes = it.findall('./enclosure')
1050                 for e in enclosure_nodes:
1051                     next_url = e.attrib.get('url')
1052                     if next_url:
1053                         break
1054
1055             if not next_url:
1056                 continue
1057
1058             entries.append({
1059                 '_type': 'url',
1060                 'url': next_url,
1061                 'title': it.find('title').text,
1062             })
1063
1064         return {
1065             '_type': 'playlist',
1066             'id': url,
1067             'title': playlist_title,
1068             'description': playlist_desc,
1069             'entries': entries,
1070         }
1071
1072     def _extract_camtasia(self, url, video_id, webpage):
1073         """ Returns None if no camtasia video can be found. """
1074
1075         camtasia_cfg = self._search_regex(
1076             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1077             webpage, 'camtasia configuration file', default=None)
1078         if camtasia_cfg is None:
1079             return None
1080
1081         title = self._html_search_meta('DC.title', webpage, fatal=True)
1082
1083         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1084         camtasia_cfg = self._download_xml(
1085             camtasia_url, video_id,
1086             note='Downloading camtasia configuration',
1087             errnote='Failed to download camtasia configuration')
1088         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1089
1090         entries = []
1091         for n in fileset_node.getchildren():
1092             url_n = n.find('./uri')
1093             if url_n is None:
1094                 continue
1095
1096             entries.append({
1097                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1098                 'title': '%s - %s' % (title, n.tag),
1099                 'url': compat_urlparse.urljoin(url, url_n.text),
1100                 'duration': float_or_none(n.find('./duration').text),
1101             })
1102
1103         return {
1104             '_type': 'playlist',
1105             'entries': entries,
1106             'title': title,
1107         }
1108
1109     def _real_extract(self, url):
1110         if url.startswith('//'):
1111             return {
1112                 '_type': 'url',
1113                 'url': self.http_scheme() + url,
1114             }
1115
1116         parsed_url = compat_urlparse.urlparse(url)
1117         if not parsed_url.scheme:
1118             default_search = self._downloader.params.get('default_search')
1119             if default_search is None:
1120                 default_search = 'fixup_error'
1121
1122             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1123                 if '/' in url:
1124                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1125                     return self.url_result('http://' + url)
1126                 elif default_search != 'fixup_error':
1127                     if default_search == 'auto_warning':
1128                         if re.match(r'^(?:url|URL)$', url):
1129                             raise ExtractorError(
1130                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1131                                 expected=True)
1132                         else:
1133                             self._downloader.report_warning(
1134                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1135                     return self.url_result('ytsearch:' + url)
1136
1137             if default_search in ('error', 'fixup_error'):
1138                 raise ExtractorError(
1139                     '%r is not a valid URL. '
1140                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1141                     % (url, url), expected=True)
1142             else:
1143                 if ':' not in default_search:
1144                     default_search += ':'
1145                 return self.url_result(default_search + url)
1146
1147         url, smuggled_data = unsmuggle_url(url)
1148         force_videoid = None
1149         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1150         if smuggled_data and 'force_videoid' in smuggled_data:
1151             force_videoid = smuggled_data['force_videoid']
1152             video_id = force_videoid
1153         else:
1154             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1155
1156         self.to_screen('%s: Requesting header' % video_id)
1157
1158         head_req = HEADRequest(url)
1159         head_response = self._request_webpage(
1160             head_req, video_id,
1161             note=False, errnote='Could not send HEAD request to %s' % url,
1162             fatal=False)
1163
1164         if head_response is not False:
1165             # Check for redirect
1166             new_url = head_response.geturl()
1167             if url != new_url:
1168                 self.report_following_redirect(new_url)
1169                 if force_videoid:
1170                     new_url = smuggle_url(
1171                         new_url, {'force_videoid': force_videoid})
1172                 return self.url_result(new_url)
1173
1174         full_response = None
1175         if head_response is False:
1176             request = compat_urllib_request.Request(url)
1177             request.add_header('Accept-Encoding', '*')
1178             full_response = self._request_webpage(request, video_id)
1179             head_response = full_response
1180
1181         # Check for direct link to a video
1182         content_type = head_response.headers.get('Content-Type', '')
1183         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1184         if m:
1185             upload_date = unified_strdate(
1186                 head_response.headers.get('Last-Modified'))
1187             return {
1188                 'id': video_id,
1189                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1190                 'direct': True,
1191                 'formats': [{
1192                     'format_id': m.group('format_id'),
1193                     'url': url,
1194                     'vcodec': 'none' if m.group('type') == 'audio' else None
1195                 }],
1196                 'upload_date': upload_date,
1197             }
1198
1199         if not self._downloader.params.get('test', False) and not is_intentional:
1200             force = self._downloader.params.get('force_generic_extractor', False)
1201             self._downloader.report_warning(
1202                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1203
1204         if not full_response:
1205             request = compat_urllib_request.Request(url)
1206             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1207             # making it impossible to download only chunk of the file (yet we need only 512kB to
1208             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1209             # that will always result in downloading the whole file that is not desirable.
1210             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1211             # to accept raw bytes and being able to download only a chunk.
1212             # It may probably better to solve this by checking Content-Type for application/octet-stream
1213             # after HEAD request finishes, but not sure if we can rely on this.
1214             request.add_header('Accept-Encoding', '*')
1215             full_response = self._request_webpage(request, video_id)
1216
1217         # Maybe it's a direct link to a video?
1218         # Be careful not to download the whole thing!
1219         first_bytes = full_response.read(512)
1220         if not is_html(first_bytes):
1221             self._downloader.report_warning(
1222                 'URL could be a direct video link, returning it as such.')
1223             upload_date = unified_strdate(
1224                 head_response.headers.get('Last-Modified'))
1225             return {
1226                 'id': video_id,
1227                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1228                 'direct': True,
1229                 'url': url,
1230                 'upload_date': upload_date,
1231             }
1232
1233         webpage = self._webpage_read_content(
1234             full_response, url, video_id, prefix=first_bytes)
1235
1236         self.report_extraction(video_id)
1237
1238         # Is it an RSS feed, a SMIL file or a XSPF playlist?
1239         try:
1240             doc = parse_xml(webpage)
1241             if doc.tag == 'rss':
1242                 return self._extract_rss(url, video_id, doc)
1243             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1244                 return self._parse_smil(doc, url, video_id)
1245             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1246                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1247         except compat_xml_parse_error:
1248             pass
1249
1250         # Is it a Camtasia project?
1251         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1252         if camtasia_res is not None:
1253             return camtasia_res
1254
1255         # Sometimes embedded video player is hidden behind percent encoding
1256         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1257         # Unescaping the whole page allows to handle those cases in a generic way
1258         webpage = compat_urllib_parse_unquote(webpage)
1259
1260         # it's tempting to parse this further, but you would
1261         # have to take into account all the variations like
1262         #   Video Title - Site Name
1263         #   Site Name | Video Title
1264         #   Video Title - Tagline | Site Name
1265         # and so on and so forth; it's just not practical
1266         video_title = self._html_search_regex(
1267             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1268             default='video')
1269
1270         # Try to detect age limit automatically
1271         age_limit = self._rta_search(webpage)
1272         # And then there are the jokers who advertise that they use RTA,
1273         # but actually don't.
1274         AGE_LIMIT_MARKERS = [
1275             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1276         ]
1277         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1278             age_limit = 18
1279
1280         # video uploader is domain name
1281         video_uploader = self._search_regex(
1282             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1283
1284         # Helper method
1285         def _playlist_from_matches(matches, getter=None, ie=None):
1286             urlrs = orderedSet(
1287                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1288                 for m in matches)
1289             return self.playlist_result(
1290                 urlrs, playlist_id=video_id, playlist_title=video_title)
1291
1292         # Look for BrightCove:
1293         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1294         if bc_urls:
1295             self.to_screen('Brightcove video detected.')
1296             entries = [{
1297                 '_type': 'url',
1298                 'url': smuggle_url(bc_url, {'Referer': url}),
1299                 'ie_key': 'Brightcove'
1300             } for bc_url in bc_urls]
1301
1302             return {
1303                 '_type': 'playlist',
1304                 'title': video_title,
1305                 'id': video_id,
1306                 'entries': entries,
1307             }
1308
1309         # Look for embedded rtl.nl player
1310         matches = re.findall(
1311             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1312             webpage)
1313         if matches:
1314             return _playlist_from_matches(matches, ie='RtlNl')
1315
1316         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1317         if vimeo_url is not None:
1318             return self.url_result(vimeo_url)
1319
1320         vid_me_embed_url = self._search_regex(
1321             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1322             webpage, 'vid.me embed', default=None)
1323         if vid_me_embed_url is not None:
1324             return self.url_result(vid_me_embed_url, 'Vidme')
1325
1326         # Look for embedded YouTube player
1327         matches = re.findall(r'''(?x)
1328             (?:
1329                 <iframe[^>]+?src=|
1330                 data-video-url=|
1331                 <embed[^>]+?src=|
1332                 embedSWF\(?:\s*|
1333                 new\s+SWFObject\(
1334             )
1335             (["\'])
1336                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1337                 (?:embed|v|p)/.+?)
1338             \1''', webpage)
1339         if matches:
1340             return _playlist_from_matches(
1341                 matches, lambda m: unescapeHTML(m[1]))
1342
1343         # Look for lazyYT YouTube embed
1344         matches = re.findall(
1345             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1346         if matches:
1347             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1348
1349         # Look for embedded Dailymotion player
1350         matches = re.findall(
1351             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1352         if matches:
1353             return _playlist_from_matches(
1354                 matches, lambda m: unescapeHTML(m[1]))
1355
1356         # Look for embedded Dailymotion playlist player (#3822)
1357         m = re.search(
1358             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1359         if m:
1360             playlists = re.findall(
1361                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1362             if playlists:
1363                 return _playlist_from_matches(
1364                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1365
1366         # Look for embedded Wistia player
1367         match = re.search(
1368             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1369         if match:
1370             embed_url = self._proto_relative_url(
1371                 unescapeHTML(match.group('url')))
1372             return {
1373                 '_type': 'url_transparent',
1374                 'url': embed_url,
1375                 'ie_key': 'Wistia',
1376                 'uploader': video_uploader,
1377                 'title': video_title,
1378                 'id': video_id,
1379             }
1380
1381         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1382         if match:
1383             return {
1384                 '_type': 'url_transparent',
1385                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1386                 'ie_key': 'Wistia',
1387                 'uploader': video_uploader,
1388                 'title': video_title,
1389                 'id': match.group('id')
1390             }
1391
1392         # Look for embedded blip.tv player
1393         bliptv_url = BlipTVIE._extract_url(webpage)
1394         if bliptv_url:
1395             return self.url_result(bliptv_url, 'BlipTV')
1396
1397         # Look for SVT player
1398         svt_url = SVTIE._extract_url(webpage)
1399         if svt_url:
1400             return self.url_result(svt_url, 'SVT')
1401
1402         # Look for embedded condenast player
1403         matches = re.findall(
1404             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1405             webpage)
1406         if matches:
1407             return {
1408                 '_type': 'playlist',
1409                 'entries': [{
1410                     '_type': 'url',
1411                     'ie_key': 'CondeNast',
1412                     'url': ma,
1413                 } for ma in matches],
1414                 'title': video_title,
1415                 'id': video_id,
1416             }
1417
1418         # Look for Bandcamp pages with custom domain
1419         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1420         if mobj is not None:
1421             burl = unescapeHTML(mobj.group(1))
1422             # Don't set the extractor because it can be a track url or an album
1423             return self.url_result(burl)
1424
1425         # Look for embedded Vevo player
1426         mobj = re.search(
1427             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1428         if mobj is not None:
1429             return self.url_result(mobj.group('url'))
1430
1431         # Look for embedded Viddler player
1432         mobj = re.search(
1433             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1434             webpage)
1435         if mobj is not None:
1436             return self.url_result(mobj.group('url'))
1437
1438         # Look for NYTimes player
1439         mobj = re.search(
1440             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1441             webpage)
1442         if mobj is not None:
1443             return self.url_result(mobj.group('url'))
1444
1445         # Look for Libsyn player
1446         mobj = re.search(
1447             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1448         if mobj is not None:
1449             return self.url_result(mobj.group('url'))
1450
1451         # Look for Ooyala videos
1452         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1453                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1454                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1455                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1456         if mobj is not None:
1457             return OoyalaIE._build_url_result(mobj.group('ec'))
1458
1459         # Look for multiple Ooyala embeds on SBN network websites
1460         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1461         if mobj is not None:
1462             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1463             if embeds:
1464                 return _playlist_from_matches(
1465                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1466
1467         # Look for Aparat videos
1468         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1469         if mobj is not None:
1470             return self.url_result(mobj.group(1), 'Aparat')
1471
1472         # Look for MPORA videos
1473         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1474         if mobj is not None:
1475             return self.url_result(mobj.group(1), 'Mpora')
1476
1477         # Look for embedded NovaMov-based player
1478         mobj = re.search(
1479             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1480                     (?P<url>http://(?:(?:embed|www)\.)?
1481                         (?:novamov\.com|
1482                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1483                            videoweed\.(?:es|com)|
1484                            movshare\.(?:net|sx|ag)|
1485                            divxstage\.(?:eu|net|ch|co|at|ag))
1486                         /embed\.php.+?)\1''', webpage)
1487         if mobj is not None:
1488             return self.url_result(mobj.group('url'))
1489
1490         # Look for embedded Facebook player
1491         mobj = re.search(
1492             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1493         if mobj is not None:
1494             return self.url_result(mobj.group('url'), 'Facebook')
1495
1496         # Look for embedded VK player
1497         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1498         if mobj is not None:
1499             return self.url_result(mobj.group('url'), 'VK')
1500
1501         # Look for embedded ivi player
1502         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1503         if mobj is not None:
1504             return self.url_result(mobj.group('url'), 'Ivi')
1505
1506         # Look for embedded Huffington Post player
1507         mobj = re.search(
1508             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1509         if mobj is not None:
1510             return self.url_result(mobj.group('url'), 'HuffPost')
1511
1512         # Look for embed.ly
1513         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1514         if mobj is not None:
1515             return self.url_result(mobj.group('url'))
1516         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1517         if mobj is not None:
1518             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1519
1520         # Look for funnyordie embed
1521         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1522         if matches:
1523             return _playlist_from_matches(
1524                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1525
1526         # Look for BBC iPlayer embed
1527         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1528         if matches:
1529             return _playlist_from_matches(matches, ie='BBCCoUk')
1530
1531         # Look for embedded RUTV player
1532         rutv_url = RUTVIE._extract_url(webpage)
1533         if rutv_url:
1534             return self.url_result(rutv_url, 'RUTV')
1535
1536         # Look for embedded TVC player
1537         tvc_url = TVCIE._extract_url(webpage)
1538         if tvc_url:
1539             return self.url_result(tvc_url, 'TVC')
1540
1541         # Look for embedded SportBox player
1542         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1543         if sportbox_urls:
1544             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1545
1546         # Look for embedded PornHub player
1547         pornhub_url = PornHubIE._extract_url(webpage)
1548         if pornhub_url:
1549             return self.url_result(pornhub_url, 'PornHub')
1550
1551         # Look for embedded XHamster player
1552         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1553         if xhamster_urls:
1554             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1555
1556         # Look for embedded Tvigle player
1557         mobj = re.search(
1558             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1559         if mobj is not None:
1560             return self.url_result(mobj.group('url'), 'Tvigle')
1561
1562         # Look for embedded TED player
1563         mobj = re.search(
1564             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1565         if mobj is not None:
1566             return self.url_result(mobj.group('url'), 'TED')
1567
1568         # Look for embedded Ustream videos
1569         mobj = re.search(
1570             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1571         if mobj is not None:
1572             return self.url_result(mobj.group('url'), 'Ustream')
1573
1574         # Look for embedded arte.tv player
1575         mobj = re.search(
1576             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1577             webpage)
1578         if mobj is not None:
1579             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1580
1581         # Look for embedded francetv player
1582         mobj = re.search(
1583             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1584             webpage)
1585         if mobj is not None:
1586             return self.url_result(mobj.group('url'))
1587
1588         # Look for embedded smotri.com player
1589         smotri_url = SmotriIE._extract_url(webpage)
1590         if smotri_url:
1591             return self.url_result(smotri_url, 'Smotri')
1592
1593         # Look for embedded Myvi.ru player
1594         myvi_url = MyviIE._extract_url(webpage)
1595         if myvi_url:
1596             return self.url_result(myvi_url)
1597
1598         # Look for embeded soundcloud player
1599         mobj = re.search(
1600             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1601             webpage)
1602         if mobj is not None:
1603             url = unescapeHTML(mobj.group('url'))
1604             return self.url_result(url)
1605
1606         # Look for embedded vulture.com player
1607         mobj = re.search(
1608             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1609             webpage)
1610         if mobj is not None:
1611             url = unescapeHTML(mobj.group('url'))
1612             return self.url_result(url, ie='Vulture')
1613
1614         # Look for embedded mtvservices player
1615         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1616         if mtvservices_url:
1617             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1618
1619         # Look for embedded yahoo player
1620         mobj = re.search(
1621             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1622             webpage)
1623         if mobj is not None:
1624             return self.url_result(mobj.group('url'), 'Yahoo')
1625
1626         # Look for embedded sbs.com.au player
1627         mobj = re.search(
1628             r'''(?x)
1629             (?:
1630                 <meta\s+property="og:video"\s+content=|
1631                 <iframe[^>]+?src=
1632             )
1633             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1634             webpage)
1635         if mobj is not None:
1636             return self.url_result(mobj.group('url'), 'SBS')
1637
1638         # Look for embedded Cinchcast player
1639         mobj = re.search(
1640             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1641             webpage)
1642         if mobj is not None:
1643             return self.url_result(mobj.group('url'), 'Cinchcast')
1644
1645         mobj = re.search(
1646             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1647             webpage)
1648         if not mobj:
1649             mobj = re.search(
1650                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1651                 webpage)
1652         if mobj is not None:
1653             return self.url_result(mobj.group('url'), 'MLB')
1654
1655         mobj = re.search(
1656             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1657             webpage)
1658         if mobj is not None:
1659             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1660
1661         mobj = re.search(
1662             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1663             webpage)
1664         if mobj is not None:
1665             return self.url_result(mobj.group('url'), 'Livestream')
1666
1667         # Look for Zapiks embed
1668         mobj = re.search(
1669             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1670         if mobj is not None:
1671             return self.url_result(mobj.group('url'), 'Zapiks')
1672
1673         # Look for Kaltura embeds
1674         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
1675                 re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
1676         if mobj is not None:
1677             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1678
1679         # Look for Eagle.Platform embeds
1680         mobj = re.search(
1681             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1682         if mobj is not None:
1683             return self.url_result(mobj.group('url'), 'EaglePlatform')
1684
1685         # Look for ClipYou (uses Eagle.Platform) embeds
1686         mobj = re.search(
1687             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1688         if mobj is not None:
1689             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1690
1691         # Look for Pladform embeds
1692         mobj = re.search(
1693             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1694         if mobj is not None:
1695             return self.url_result(mobj.group('url'), 'Pladform')
1696
1697         # Look for Playwire embeds
1698         mobj = re.search(
1699             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1700         if mobj is not None:
1701             return self.url_result(mobj.group('url'))
1702
1703         # Look for 5min embeds
1704         mobj = re.search(
1705             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1706         if mobj is not None:
1707             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1708
1709         # Look for Crooks and Liars embeds
1710         mobj = re.search(
1711             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1712         if mobj is not None:
1713             return self.url_result(mobj.group('url'))
1714
1715         # Look for NBC Sports VPlayer embeds
1716         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1717         if nbc_sports_url:
1718             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1719
1720         # Look for UDN embeds
1721         mobj = re.search(
1722             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1723         if mobj is not None:
1724             return self.url_result(
1725                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1726
1727         # Look for Senate ISVP iframe
1728         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1729         if senate_isvp_url:
1730             return self.url_result(senate_isvp_url, 'SenateISVP')
1731
1732         # Look for Dailymotion Cloud videos
1733         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1734         if dmcloud_url:
1735             return self.url_result(dmcloud_url, 'DailymotionCloud')
1736
1737         # Look for OnionStudios embeds
1738         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1739         if onionstudios_url:
1740             return self.url_result(onionstudios_url)
1741
1742         # Look for SnagFilms embeds
1743         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1744         if snagfilms_url:
1745             return self.url_result(snagfilms_url)
1746
1747         # Look for ScreenwaveMedia embeds
1748         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
1749         if mobj is not None:
1750             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
1751
1752         # Look for AdobeTVVideo embeds
1753         mobj = re.search(
1754             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1755             webpage)
1756         if mobj is not None:
1757             return self.url_result(
1758                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1759                 'AdobeTVVideo')
1760
1761         def check_video(vurl):
1762             if YoutubeIE.suitable(vurl):
1763                 return True
1764             vpath = compat_urlparse.urlparse(vurl).path
1765             vext = determine_ext(vpath)
1766             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1767
1768         def filter_video(urls):
1769             return list(filter(check_video, urls))
1770
1771         # Start with something easy: JW Player in SWFObject
1772         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1773         if not found:
1774             # Look for gorilla-vid style embedding
1775             found = filter_video(re.findall(r'''(?sx)
1776                 (?:
1777                     jw_plugins|
1778                     JWPlayerOptions|
1779                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1780                 )
1781                 .*?
1782                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1783         if not found:
1784             # Broaden the search a little bit
1785             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1786         if not found:
1787             # Broaden the findall a little bit: JWPlayer JS loader
1788             found = filter_video(re.findall(
1789                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1790         if not found:
1791             # Flow player
1792             found = filter_video(re.findall(r'''(?xs)
1793                 flowplayer\("[^"]+",\s*
1794                     \{[^}]+?\}\s*,
1795                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1796                         ["']?url["']?\s*:\s*["']([^"']+)["']
1797             ''', webpage))
1798         if not found:
1799             # Cinerama player
1800             found = re.findall(
1801                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1802         if not found:
1803             # Try to find twitter cards info
1804             found = filter_video(re.findall(
1805                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1806         if not found:
1807             # We look for Open Graph info:
1808             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1809             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1810             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1811             if m_video_type is not None:
1812                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1813         if not found:
1814             # HTML5 video
1815             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1816         if not found:
1817             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1818             found = re.search(
1819                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1820                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1821                 webpage)
1822             if not found:
1823                 # Look also in Refresh HTTP header
1824                 refresh_header = head_response.headers.get('Refresh')
1825                 if refresh_header:
1826                     # In python 2 response HTTP headers are bytestrings
1827                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
1828                         refresh_header = refresh_header.decode('iso-8859-1')
1829                     found = re.search(REDIRECT_REGEX, refresh_header)
1830             if found:
1831                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
1832                 self.report_following_redirect(new_url)
1833                 return {
1834                     '_type': 'url',
1835                     'url': new_url,
1836                 }
1837         if not found:
1838             raise UnsupportedError(url)
1839
1840         entries = []
1841         for video_url in found:
1842             video_url = compat_urlparse.urljoin(url, video_url)
1843             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
1844
1845             # Sometimes, jwplayer extraction will result in a YouTube URL
1846             if YoutubeIE.suitable(video_url):
1847                 entries.append(self.url_result(video_url, 'Youtube'))
1848                 continue
1849
1850             # here's a fun little line of code for you:
1851             video_id = os.path.splitext(video_id)[0]
1852
1853             ext = determine_ext(video_url)
1854             if ext == 'smil':
1855                 entries.append({
1856                     'id': video_id,
1857                     'formats': self._extract_smil_formats(video_url, video_id),
1858                     'uploader': video_uploader,
1859                     'title': video_title,
1860                     'age_limit': age_limit,
1861                 })
1862             elif ext == 'xspf':
1863                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
1864             else:
1865                 entries.append({
1866                     'id': video_id,
1867                     'url': video_url,
1868                     'uploader': video_uploader,
1869                     'title': video_title,
1870                     'age_limit': age_limit,
1871                 })
1872
1873         if len(entries) == 1:
1874             return entries[0]
1875         else:
1876             for num, e in enumerate(entries, start=1):
1877                 # 'url' results don't have a title
1878                 if e.get('title') is not None:
1879                     e['title'] = '%s (%d)' % (e['title'], num)
1880             return {
1881                 '_type': 'playlist',
1882                 'entries': entries,
1883             }