[extractor/generic] Use compat_urllib_parse_unquote for unquoting video_id and title...
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7
8 from .common import InfoExtractor
9 from .youtube import YoutubeIE
10 from ..compat import (
11     compat_urllib_parse,
12     compat_urllib_parse_unquote,
13     compat_urllib_request,
14     compat_urlparse,
15     compat_xml_parse_error,
16 )
17 from ..utils import (
18     determine_ext,
19     ExtractorError,
20     float_or_none,
21     HEADRequest,
22     is_html,
23     orderedSet,
24     parse_xml,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import BrightcoveIE
34 from .nbc import NBCSportsVPlayerIE
35 from .ooyala import OoyalaIE
36 from .rutv import RUTVIE
37 from .sportbox import SportBoxEmbedIE
38 from .smotri import SmotriIE
39 from .condenast import CondeNastIE
40 from .udn import UDNEmbedIE
41 from .senateisvp import SenateISVPIE
42 from .bliptv import BlipTVIE
43 from .svt import SVTIE
44
45
46 class GenericIE(InfoExtractor):
47     IE_DESC = 'Generic downloader that works on some sites'
48     _VALID_URL = r'.*'
49     IE_NAME = 'generic'
50     _TESTS = [
51         {
52             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
53             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
54             'info_dict': {
55                 'id': '13601338388002',
56                 'ext': 'mp4',
57                 'uploader': 'www.hodiho.fr',
58                 'title': 'R\u00e9gis plante sa Jeep',
59             }
60         },
61         # bandcamp page with custom domain
62         {
63             'add_ie': ['Bandcamp'],
64             'url': 'http://bronyrock.com/track/the-pony-mash',
65             'info_dict': {
66                 'id': '3235767654',
67                 'ext': 'mp3',
68                 'title': 'The Pony Mash',
69                 'uploader': 'M_Pallante',
70             },
71             'skip': 'There is a limit of 200 free downloads / month for the test song',
72         },
73         # embedded brightcove video
74         # it also tests brightcove videos that need to set the 'Referer' in the
75         # http requests
76         {
77             'add_ie': ['Brightcove'],
78             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
79             'info_dict': {
80                 'id': '2765128793001',
81                 'ext': 'mp4',
82                 'title': 'Le cours de bourse : l’analyse technique',
83                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
84                 'uploader': 'BFM BUSINESS',
85             },
86             'params': {
87                 'skip_download': True,
88             },
89         },
90         {
91             # https://github.com/rg3/youtube-dl/issues/2253
92             'url': 'http://bcove.me/i6nfkrc3',
93             'md5': '0ba9446db037002366bab3b3eb30c88c',
94             'info_dict': {
95                 'id': '3101154703001',
96                 'ext': 'mp4',
97                 'title': 'Still no power',
98                 'uploader': 'thestar.com',
99                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
100             },
101             'add_ie': ['Brightcove'],
102         },
103         {
104             'url': 'http://www.championat.com/video/football/v/87/87499.html',
105             'md5': 'fb973ecf6e4a78a67453647444222983',
106             'info_dict': {
107                 'id': '3414141473001',
108                 'ext': 'mp4',
109                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
110                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
111                 'uploader': 'Championat',
112             },
113         },
114         {
115             # https://github.com/rg3/youtube-dl/issues/3541
116             'add_ie': ['Brightcove'],
117             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
118             'info_dict': {
119                 'id': '3866516442001',
120                 'ext': 'mp4',
121                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
122                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
123                 'uploader': 'SBS Broadcasting',
124             },
125             'skip': 'Restricted to Netherlands',
126             'params': {
127                 'skip_download': True,  # m3u8 download
128             },
129         },
130         # Direct link to a video
131         {
132             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
133             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
134             'info_dict': {
135                 'id': 'trailer',
136                 'ext': 'mp4',
137                 'title': 'trailer',
138                 'upload_date': '20100513',
139             }
140         },
141         # ooyala video
142         {
143             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
144             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
145             'info_dict': {
146                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
147                 'ext': 'mp4',
148                 'title': '2cc213299525360.mov',  # that's what we get
149             },
150             'add_ie': ['Ooyala'],
151         },
152         # multiple ooyala embeds on SBN network websites
153         {
154             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
155             'info_dict': {
156                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
157                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
158             },
159             'playlist_mincount': 3,
160             'params': {
161                 'skip_download': True,
162             },
163             'add_ie': ['Ooyala'],
164         },
165         # google redirect
166         {
167             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
168             'info_dict': {
169                 'id': 'cmQHVoWB5FY',
170                 'ext': 'mp4',
171                 'upload_date': '20130224',
172                 'uploader_id': 'TheVerge',
173                 'description': 're:^Chris Ziegler takes a look at the\.*',
174                 'uploader': 'The Verge',
175                 'title': 'First Firefox OS phones side-by-side',
176             },
177             'params': {
178                 'skip_download': False,
179             }
180         },
181         # embed.ly video
182         {
183             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
184             'info_dict': {
185                 'id': '9ODmcdjQcHQ',
186                 'ext': 'mp4',
187                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
188                 'upload_date': '20140225',
189                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
190                 'uploader': 'Tested',
191                 'uploader_id': 'testedcom',
192             },
193             # No need to test YoutubeIE here
194             'params': {
195                 'skip_download': True,
196             },
197         },
198         # funnyordie embed
199         {
200             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
201             'info_dict': {
202                 'id': '18e820ec3f',
203                 'ext': 'mp4',
204                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
205                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
206             },
207         },
208         # BBC iPlayer embeds
209         {
210             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
211             'info_dict': {
212                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
213             },
214             'playlist_mincount': 18,
215         },
216         # RUTV embed
217         {
218             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
219             'info_dict': {
220                 'id': '776940',
221                 'ext': 'mp4',
222                 'title': 'Охотское море стало целиком российским',
223                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
224             },
225             'params': {
226                 # m3u8 download
227                 'skip_download': True,
228             },
229         },
230         # SportBox embed
231         {
232             'url': 'http://www.vestifinance.ru/articles/25753',
233             'info_dict': {
234                 'id': '25753',
235                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
236             },
237             'playlist': [{
238                 'info_dict': {
239                     'id': '370908',
240                     'title': 'Госзаказ. День 3',
241                     'ext': 'mp4',
242                 }
243             }, {
244                 'info_dict': {
245                     'id': '370905',
246                     'title': 'Госзаказ. День 2',
247                     'ext': 'mp4',
248                 }
249             }, {
250                 'info_dict': {
251                     'id': '370902',
252                     'title': 'Госзаказ. День 1',
253                     'ext': 'mp4',
254                 }
255             }],
256             'params': {
257                 # m3u8 download
258                 'skip_download': True,
259             },
260         },
261         # Embedded TED video
262         {
263             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
264             'md5': '65fdff94098e4a607385a60c5177c638',
265             'info_dict': {
266                 'id': '1969',
267                 'ext': 'mp4',
268                 'title': 'Hidden miracles of the natural world',
269                 'uploader': 'Louie Schwartzberg',
270                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
271             }
272         },
273         # Embeded Ustream video
274         {
275             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
276             'md5': '27b99cdb639c9b12a79bca876a073417',
277             'info_dict': {
278                 'id': '45734260',
279                 'ext': 'flv',
280                 'uploader': 'AU SPA:  The NSA and Privacy',
281                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
282             }
283         },
284         # nowvideo embed hidden behind percent encoding
285         {
286             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
287             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
288             'info_dict': {
289                 'id': '06e53103ca9aa',
290                 'ext': 'flv',
291                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
292                 'description': 'No description',
293             },
294         },
295         # arte embed
296         {
297             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
298             'md5': '7653032cbb25bf6c80d80f217055fa43',
299             'info_dict': {
300                 'id': '048195-004_PLUS7-F',
301                 'ext': 'flv',
302                 'title': 'X:enius',
303                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
304                 'upload_date': '20140320',
305             },
306             'params': {
307                 'skip_download': 'Requires rtmpdump'
308             }
309         },
310         # Condé Nast embed
311         {
312             'url': 'http://www.wired.com/2014/04/honda-asimo/',
313             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
314             'info_dict': {
315                 'id': '53501be369702d3275860000',
316                 'ext': 'mp4',
317                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
318             }
319         },
320         # Dailymotion embed
321         {
322             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
323             'md5': '441aeeb82eb72c422c7f14ec533999cd',
324             'info_dict': {
325                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
326                 'ext': 'mp4',
327                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
328                 'uploader': 'Spi0n',
329             },
330             'add_ie': ['Dailymotion'],
331         },
332         # YouTube embed
333         {
334             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
335             'info_dict': {
336                 'id': 'FXRb4ykk4S0',
337                 'ext': 'mp4',
338                 'title': 'The NBL Auction 2014',
339                 'uploader': 'BADMINTON England',
340                 'uploader_id': 'BADMINTONEvents',
341                 'upload_date': '20140603',
342                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
343             },
344             'add_ie': ['Youtube'],
345             'params': {
346                 'skip_download': True,
347             }
348         },
349         # MTVSercices embed
350         {
351             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
352             'md5': '35727f82f58c76d996fc188f9755b0d5',
353             'info_dict': {
354                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
355                 'ext': 'mp4',
356                 'title': 'Review',
357                 'description': 'Mario\'s life in the fast lane has never looked so good.',
358             },
359         },
360         # YouTube embed via <data-embed-url="">
361         {
362             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
363             'info_dict': {
364                 'id': '4vAffPZIT44',
365                 'ext': 'mp4',
366                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
367                 'uploader': 'Gameloft',
368                 'uploader_id': 'gameloft',
369                 'upload_date': '20140828',
370                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
371             },
372             'params': {
373                 'skip_download': True,
374             }
375         },
376         # Camtasia studio
377         {
378             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
379             'playlist': [{
380                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
381                 'info_dict': {
382                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
383                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
384                     'ext': 'flv',
385                     'duration': 2235.90,
386                 }
387             }, {
388                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
389                 'info_dict': {
390                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
391                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
392                     'ext': 'flv',
393                     'duration': 2235.93,
394                 }
395             }],
396             'info_dict': {
397                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
398             }
399         },
400         # Flowplayer
401         {
402             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
403             'md5': '9d65602bf31c6e20014319c7d07fba27',
404             'info_dict': {
405                 'id': '5123ea6d5e5a7',
406                 'ext': 'mp4',
407                 'age_limit': 18,
408                 'uploader': 'www.handjobhub.com',
409                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
410             }
411         },
412         # RSS feed
413         {
414             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
415             'info_dict': {
416                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
417                 'title': 'Zero Punctuation',
418                 'description': 're:.*groundbreaking video review series.*'
419             },
420             'playlist_mincount': 11,
421         },
422         # Multiple brightcove videos
423         # https://github.com/rg3/youtube-dl/issues/2283
424         {
425             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
426             'info_dict': {
427                 'id': 'always-never',
428                 'title': 'Always / Never - The New Yorker',
429             },
430             'playlist_count': 3,
431             'params': {
432                 'extract_flat': False,
433                 'skip_download': True,
434             }
435         },
436         # MLB embed
437         {
438             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
439             'md5': '96f09a37e44da40dd083e12d9a683327',
440             'info_dict': {
441                 'id': '33322633',
442                 'ext': 'mp4',
443                 'title': 'Ump changes call to ball',
444                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
445                 'duration': 48,
446                 'timestamp': 1401537900,
447                 'upload_date': '20140531',
448                 'thumbnail': 're:^https?://.*\.jpg$',
449             },
450         },
451         # Wistia embed
452         {
453             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
454             'md5': '8788b683c777a5cf25621eaf286d0c23',
455             'info_dict': {
456                 'id': '1cfaf6b7ea',
457                 'ext': 'mov',
458                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
459                 'duration': 643.0,
460                 'filesize': 182808282,
461                 'uploader': 'education-portal.com',
462             },
463         },
464         {
465             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
466             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
467             'info_dict': {
468                 'id': 'uxjb0lwrcz',
469                 'ext': 'mp4',
470                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
471                 'duration': 1715.0,
472                 'uploader': 'thoughtworks.wistia.com',
473             },
474         },
475         # Direct download with broken HEAD
476         {
477             'url': 'http://ai-radio.org:8000/radio.opus',
478             'info_dict': {
479                 'id': 'radio',
480                 'ext': 'opus',
481                 'title': 'radio',
482             },
483             'params': {
484                 'skip_download': True,  # infinite live stream
485             },
486             'expected_warnings': [
487                 r'501.*Not Implemented'
488             ],
489         },
490         # Soundcloud embed
491         {
492             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
493             'info_dict': {
494                 'id': '174391317',
495                 'ext': 'mp3',
496                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
497                 'uploader': 'Sophos Security',
498                 'title': 'Chet Chat 171 - Oct 29, 2014',
499                 'upload_date': '20141029',
500             }
501         },
502         # Livestream embed
503         {
504             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
505             'info_dict': {
506                 'id': '67864563',
507                 'ext': 'flv',
508                 'upload_date': '20141112',
509                 'title': 'Rosetta #CometLanding webcast HL 10',
510             }
511         },
512         # LazyYT
513         {
514             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
515             'info_dict': {
516                 'id': '1986',
517                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
518             },
519             'playlist_mincount': 2,
520         },
521         # Direct link with incorrect MIME type
522         {
523             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
524             'md5': '4ccbebe5f36706d85221f204d7eb5913',
525             'info_dict': {
526                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
527                 'id': '5_Lennart_Poettering_-_Systemd',
528                 'ext': 'webm',
529                 'title': '5_Lennart_Poettering_-_Systemd',
530                 'upload_date': '20141120',
531             },
532             'expected_warnings': [
533                 'URL could be a direct video link, returning it as such.'
534             ]
535         },
536         # Cinchcast embed
537         {
538             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
539             'info_dict': {
540                 'id': '7141703',
541                 'ext': 'mp3',
542                 'upload_date': '20141126',
543                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
544             }
545         },
546         # Cinerama player
547         {
548             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
549             'info_dict': {
550                 'id': '730m_DandD_1901_512k',
551                 'ext': 'mp4',
552                 'uploader': 'www.abc.net.au',
553                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
554             }
555         },
556         # embedded viddler video
557         {
558             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
559             'info_dict': {
560                 'id': '4d03aad9',
561                 'ext': 'mp4',
562                 'uploader': 'deadspin',
563                 'title': 'WALL-TO-GORTAT',
564                 'timestamp': 1422285291,
565                 'upload_date': '20150126',
566             },
567             'add_ie': ['Viddler'],
568         },
569         # Libsyn embed
570         {
571             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
572             'info_dict': {
573                 'id': '3377616',
574                 'ext': 'mp3',
575                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
576                 'description': 'md5:601cb790edd05908957dae8aaa866465',
577                 'upload_date': '20150220',
578             },
579         },
580         # jwplayer YouTube
581         {
582             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
583             'info_dict': {
584                 'id': 'Mrj4DVp2zeA',
585                 'ext': 'mp4',
586                 'upload_date': '20150212',
587                 'uploader': 'The National Archives UK',
588                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
589                 'uploader_id': 'NationalArchives08',
590                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
591             },
592         },
593         # rtl.nl embed
594         {
595             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
596             'playlist_mincount': 5,
597             'info_dict': {
598                 'id': 'aanslagen-kopenhagen',
599                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
600             }
601         },
602         # Zapiks embed
603         {
604             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
605             'info_dict': {
606                 'id': '118046',
607                 'ext': 'mp4',
608                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
609             }
610         },
611         # Kaltura embed
612         {
613             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
614             'info_dict': {
615                 'id': '1_eergr3h1',
616                 'ext': 'mp4',
617                 'upload_date': '20150226',
618                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
619                 'timestamp': int,
620                 'title': 'John Carlson Postgame 2/25/15',
621             },
622         },
623         # Eagle.Platform embed (generic URL)
624         {
625             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
626             'info_dict': {
627                 'id': '227304',
628                 'ext': 'mp4',
629                 'title': 'Навальный вышел на свободу',
630                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
631                 'thumbnail': 're:^https?://.*\.jpg$',
632                 'duration': 87,
633                 'view_count': int,
634                 'age_limit': 0,
635             },
636         },
637         # ClipYou (Eagle.Platform) embed (custom URL)
638         {
639             'url': 'http://muz-tv.ru/play/7129/',
640             'info_dict': {
641                 'id': '12820',
642                 'ext': 'mp4',
643                 'title': "'O Sole Mio",
644                 'thumbnail': 're:^https?://.*\.jpg$',
645                 'duration': 216,
646                 'view_count': int,
647             },
648         },
649         # Pladform embed
650         {
651             'url': 'http://muz-tv.ru/kinozal/view/7400/',
652             'info_dict': {
653                 'id': '100183293',
654                 'ext': 'mp4',
655                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
656                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
657                 'thumbnail': 're:^https?://.*\.jpg$',
658                 'duration': 694,
659                 'age_limit': 0,
660             },
661         },
662         # Playwire embed
663         {
664             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
665             'info_dict': {
666                 'id': '3519514',
667                 'ext': 'mp4',
668                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
669                 'thumbnail': 're:^https?://.*\.png$',
670                 'duration': 45.115,
671             },
672         },
673         # 5min embed
674         {
675             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
676             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
677             'info_dict': {
678                 'id': '518726732',
679                 'ext': 'mp4',
680                 'title': 'Facebook Creates "On This Day" | Crunch Report',
681             },
682         },
683         # SVT embed
684         {
685             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
686             'info_dict': {
687                 'id': '2900353',
688                 'ext': 'flv',
689                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
690                 'duration': 27,
691                 'age_limit': 0,
692             },
693         },
694         # RSS feed with enclosure
695         {
696             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
697             'info_dict': {
698                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
699                 'ext': 'm4v',
700                 'upload_date': '20150228',
701                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
702             }
703         },
704         # Crooks and Liars embed
705         {
706             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
707             'info_dict': {
708                 'id': '8RUoRhRi',
709                 'ext': 'mp4',
710                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
711                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
712                 'timestamp': 1428207000,
713                 'upload_date': '20150405',
714                 'uploader': 'Heather',
715             },
716         },
717         # Crooks and Liars external embed
718         {
719             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
720             'info_dict': {
721                 'id': 'MTE3MjUtMzQ2MzA',
722                 'ext': 'mp4',
723                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
724                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
725                 'timestamp': 1265032391,
726                 'upload_date': '20100201',
727                 'uploader': 'Heather',
728             },
729         },
730         # NBC Sports vplayer embed
731         {
732             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
733             'info_dict': {
734                 'id': 'ln7x1qSThw4k',
735                 'ext': 'flv',
736                 'title': "PFT Live: New leader in the 'new-look' defense",
737                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
738             },
739         },
740         # UDN embed
741         {
742             'url': 'http://www.udn.com/news/story/7314/822787',
743             'md5': 'fd2060e988c326991037b9aff9df21a6',
744             'info_dict': {
745                 'id': '300346',
746                 'ext': 'mp4',
747                 'title': '中一中男師變性 全校師生力挺',
748                 'thumbnail': 're:^https?://.*\.jpg$',
749             }
750         },
751         # Ooyala embed
752         {
753             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
754             'info_dict': {
755                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
756                 'ext': 'mp4',
757                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
758                 'title': 'This is what separates the Excel masters from the wannabes',
759             },
760             'params': {
761                 # m3u8 downloads
762                 'skip_download': True,
763             }
764         },
765         # Contains a SMIL manifest
766         {
767             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
768             'info_dict': {
769                 'id': 'file',
770                 'ext': 'flv',
771                 'title': '+ Football: Lottery Champions League Europe',
772                 'uploader': 'www.telewebion.com',
773             },
774             'params': {
775                 # rtmpe downloads
776                 'skip_download': True,
777             }
778         }
779     ]
780
781     def report_following_redirect(self, new_url):
782         """Report information extraction."""
783         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
784
785     def _extract_rss(self, url, video_id, doc):
786         playlist_title = doc.find('./channel/title').text
787         playlist_desc_el = doc.find('./channel/description')
788         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
789
790         entries = []
791         for it in doc.findall('./channel/item'):
792             next_url = xpath_text(it, 'link', fatal=False)
793             if not next_url:
794                 enclosure_nodes = it.findall('./enclosure')
795                 for e in enclosure_nodes:
796                     next_url = e.attrib.get('url')
797                     if next_url:
798                         break
799
800             if not next_url:
801                 continue
802
803             entries.append({
804                 '_type': 'url',
805                 'url': next_url,
806                 'title': it.find('title').text,
807             })
808
809         return {
810             '_type': 'playlist',
811             'id': url,
812             'title': playlist_title,
813             'description': playlist_desc,
814             'entries': entries,
815         }
816
817     def _extract_camtasia(self, url, video_id, webpage):
818         """ Returns None if no camtasia video can be found. """
819
820         camtasia_cfg = self._search_regex(
821             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
822             webpage, 'camtasia configuration file', default=None)
823         if camtasia_cfg is None:
824             return None
825
826         title = self._html_search_meta('DC.title', webpage, fatal=True)
827
828         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
829         camtasia_cfg = self._download_xml(
830             camtasia_url, video_id,
831             note='Downloading camtasia configuration',
832             errnote='Failed to download camtasia configuration')
833         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
834
835         entries = []
836         for n in fileset_node.getchildren():
837             url_n = n.find('./uri')
838             if url_n is None:
839                 continue
840
841             entries.append({
842                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
843                 'title': '%s - %s' % (title, n.tag),
844                 'url': compat_urlparse.urljoin(url, url_n.text),
845                 'duration': float_or_none(n.find('./duration').text),
846             })
847
848         return {
849             '_type': 'playlist',
850             'entries': entries,
851             'title': title,
852         }
853
854     def _real_extract(self, url):
855         if url.startswith('//'):
856             return {
857                 '_type': 'url',
858                 'url': self.http_scheme() + url,
859             }
860
861         parsed_url = compat_urlparse.urlparse(url)
862         if not parsed_url.scheme:
863             default_search = self._downloader.params.get('default_search')
864             if default_search is None:
865                 default_search = 'fixup_error'
866
867             if default_search in ('auto', 'auto_warning', 'fixup_error'):
868                 if '/' in url:
869                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
870                     return self.url_result('http://' + url)
871                 elif default_search != 'fixup_error':
872                     if default_search == 'auto_warning':
873                         if re.match(r'^(?:url|URL)$', url):
874                             raise ExtractorError(
875                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
876                                 expected=True)
877                         else:
878                             self._downloader.report_warning(
879                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
880                     return self.url_result('ytsearch:' + url)
881
882             if default_search in ('error', 'fixup_error'):
883                 raise ExtractorError(
884                     '%r is not a valid URL. '
885                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
886                     % (url, url), expected=True)
887             else:
888                 if ':' not in default_search:
889                     default_search += ':'
890                 return self.url_result(default_search + url)
891
892         url, smuggled_data = unsmuggle_url(url)
893         force_videoid = None
894         is_intentional = smuggled_data and smuggled_data.get('to_generic')
895         if smuggled_data and 'force_videoid' in smuggled_data:
896             force_videoid = smuggled_data['force_videoid']
897             video_id = force_videoid
898         else:
899             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
900
901         self.to_screen('%s: Requesting header' % video_id)
902
903         head_req = HEADRequest(url)
904         head_response = self._request_webpage(
905             head_req, video_id,
906             note=False, errnote='Could not send HEAD request to %s' % url,
907             fatal=False)
908
909         if head_response is not False:
910             # Check for redirect
911             new_url = head_response.geturl()
912             if url != new_url:
913                 self.report_following_redirect(new_url)
914                 if force_videoid:
915                     new_url = smuggle_url(
916                         new_url, {'force_videoid': force_videoid})
917                 return self.url_result(new_url)
918
919         full_response = None
920         if head_response is False:
921             request = compat_urllib_request.Request(url)
922             request.add_header('Accept-Encoding', '*')
923             full_response = self._request_webpage(request, video_id)
924             head_response = full_response
925
926         # Check for direct link to a video
927         content_type = head_response.headers.get('Content-Type', '')
928         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
929         if m:
930             upload_date = unified_strdate(
931                 head_response.headers.get('Last-Modified'))
932             return {
933                 'id': video_id,
934                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
935                 'direct': True,
936                 'formats': [{
937                     'format_id': m.group('format_id'),
938                     'url': url,
939                     'vcodec': 'none' if m.group('type') == 'audio' else None
940                 }],
941                 'upload_date': upload_date,
942             }
943
944         if not self._downloader.params.get('test', False) and not is_intentional:
945             self._downloader.report_warning('Falling back on generic information extractor.')
946
947         if not full_response:
948             request = compat_urllib_request.Request(url)
949             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
950             # making it impossible to download only chunk of the file (yet we need only 512kB to
951             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
952             # that will always result in downloading the whole file that is not desirable.
953             # Therefore for extraction pass we have to override Accept-Encoding to any in order
954             # to accept raw bytes and being able to download only a chunk.
955             # It may probably better to solve this by checking Content-Type for application/octet-stream
956             # after HEAD request finishes, but not sure if we can rely on this.
957             request.add_header('Accept-Encoding', '*')
958             full_response = self._request_webpage(request, video_id)
959
960         # Maybe it's a direct link to a video?
961         # Be careful not to download the whole thing!
962         first_bytes = full_response.read(512)
963         if not is_html(first_bytes):
964             self._downloader.report_warning(
965                 'URL could be a direct video link, returning it as such.')
966             upload_date = unified_strdate(
967                 head_response.headers.get('Last-Modified'))
968             return {
969                 'id': video_id,
970                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
971                 'direct': True,
972                 'url': url,
973                 'upload_date': upload_date,
974             }
975
976         webpage = self._webpage_read_content(
977             full_response, url, video_id, prefix=first_bytes)
978
979         self.report_extraction(video_id)
980
981         # Is it an RSS feed?
982         try:
983             doc = parse_xml(webpage)
984             if doc.tag == 'rss':
985                 return self._extract_rss(url, video_id, doc)
986         except compat_xml_parse_error:
987             pass
988
989         # Is it a Camtasia project?
990         camtasia_res = self._extract_camtasia(url, video_id, webpage)
991         if camtasia_res is not None:
992             return camtasia_res
993
994         # Sometimes embedded video player is hidden behind percent encoding
995         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
996         # Unescaping the whole page allows to handle those cases in a generic way
997         webpage = compat_urllib_parse.unquote(webpage)
998
999         # it's tempting to parse this further, but you would
1000         # have to take into account all the variations like
1001         #   Video Title - Site Name
1002         #   Site Name | Video Title
1003         #   Video Title - Tagline | Site Name
1004         # and so on and so forth; it's just not practical
1005         video_title = self._html_search_regex(
1006             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1007             default='video')
1008
1009         # Try to detect age limit automatically
1010         age_limit = self._rta_search(webpage)
1011         # And then there are the jokers who advertise that they use RTA,
1012         # but actually don't.
1013         AGE_LIMIT_MARKERS = [
1014             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1015         ]
1016         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1017             age_limit = 18
1018
1019         # video uploader is domain name
1020         video_uploader = self._search_regex(
1021             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1022
1023         # Helper method
1024         def _playlist_from_matches(matches, getter=None, ie=None):
1025             urlrs = orderedSet(
1026                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1027                 for m in matches)
1028             return self.playlist_result(
1029                 urlrs, playlist_id=video_id, playlist_title=video_title)
1030
1031         # Look for BrightCove:
1032         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1033         if bc_urls:
1034             self.to_screen('Brightcove video detected.')
1035             entries = [{
1036                 '_type': 'url',
1037                 'url': smuggle_url(bc_url, {'Referer': url}),
1038                 'ie_key': 'Brightcove'
1039             } for bc_url in bc_urls]
1040
1041             return {
1042                 '_type': 'playlist',
1043                 'title': video_title,
1044                 'id': video_id,
1045                 'entries': entries,
1046             }
1047
1048         # Look for embedded rtl.nl player
1049         matches = re.findall(
1050             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
1051             webpage)
1052         if matches:
1053             return _playlist_from_matches(matches, ie='RtlNl')
1054
1055         # Look for embedded (iframe) Vimeo player
1056         mobj = re.search(
1057             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
1058         if mobj:
1059             player_url = unescapeHTML(mobj.group('url'))
1060             surl = smuggle_url(player_url, {'Referer': url})
1061             return self.url_result(surl)
1062         # Look for embedded (swf embed) Vimeo player
1063         mobj = re.search(
1064             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1065         if mobj:
1066             return self.url_result(mobj.group(1))
1067
1068         # Look for embedded YouTube player
1069         matches = re.findall(r'''(?x)
1070             (?:
1071                 <iframe[^>]+?src=|
1072                 data-video-url=|
1073                 <embed[^>]+?src=|
1074                 embedSWF\(?:\s*|
1075                 new\s+SWFObject\(
1076             )
1077             (["\'])
1078                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1079                 (?:embed|v|p)/.+?)
1080             \1''', webpage)
1081         if matches:
1082             return _playlist_from_matches(
1083                 matches, lambda m: unescapeHTML(m[1]))
1084
1085         # Look for lazyYT YouTube embed
1086         matches = re.findall(
1087             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1088         if matches:
1089             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1090
1091         # Look for embedded Dailymotion player
1092         matches = re.findall(
1093             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1094         if matches:
1095             return _playlist_from_matches(
1096                 matches, lambda m: unescapeHTML(m[1]))
1097
1098         # Look for embedded Dailymotion playlist player (#3822)
1099         m = re.search(
1100             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1101         if m:
1102             playlists = re.findall(
1103                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1104             if playlists:
1105                 return _playlist_from_matches(
1106                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1107
1108         # Look for embedded Wistia player
1109         match = re.search(
1110             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1111         if match:
1112             embed_url = self._proto_relative_url(
1113                 unescapeHTML(match.group('url')))
1114             return {
1115                 '_type': 'url_transparent',
1116                 'url': embed_url,
1117                 'ie_key': 'Wistia',
1118                 'uploader': video_uploader,
1119                 'title': video_title,
1120                 'id': video_id,
1121             }
1122
1123         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1124         if match:
1125             return {
1126                 '_type': 'url_transparent',
1127                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1128                 'ie_key': 'Wistia',
1129                 'uploader': video_uploader,
1130                 'title': video_title,
1131                 'id': match.group('id')
1132             }
1133
1134         # Look for embedded blip.tv player
1135         bliptv_url = BlipTVIE._extract_url(webpage)
1136         if bliptv_url:
1137             return self.url_result(bliptv_url, 'BlipTV')
1138
1139         # Look for SVT player
1140         svt_url = SVTIE._extract_url(webpage)
1141         if svt_url:
1142             return self.url_result(svt_url, 'SVT')
1143
1144         # Look for embedded condenast player
1145         matches = re.findall(
1146             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1147             webpage)
1148         if matches:
1149             return {
1150                 '_type': 'playlist',
1151                 'entries': [{
1152                     '_type': 'url',
1153                     'ie_key': 'CondeNast',
1154                     'url': ma,
1155                 } for ma in matches],
1156                 'title': video_title,
1157                 'id': video_id,
1158             }
1159
1160         # Look for Bandcamp pages with custom domain
1161         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1162         if mobj is not None:
1163             burl = unescapeHTML(mobj.group(1))
1164             # Don't set the extractor because it can be a track url or an album
1165             return self.url_result(burl)
1166
1167         # Look for embedded Vevo player
1168         mobj = re.search(
1169             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1170         if mobj is not None:
1171             return self.url_result(mobj.group('url'))
1172
1173         # Look for embedded Viddler player
1174         mobj = re.search(
1175             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1176             webpage)
1177         if mobj is not None:
1178             return self.url_result(mobj.group('url'))
1179
1180         # Look for NYTimes player
1181         mobj = re.search(
1182             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1183             webpage)
1184         if mobj is not None:
1185             return self.url_result(mobj.group('url'))
1186
1187         # Look for Libsyn player
1188         mobj = re.search(
1189             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1190         if mobj is not None:
1191             return self.url_result(mobj.group('url'))
1192
1193         # Look for Ooyala videos
1194         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1195                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1196                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1197                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1198         if mobj is not None:
1199             return OoyalaIE._build_url_result(mobj.group('ec'))
1200
1201         # Look for multiple Ooyala embeds on SBN network websites
1202         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1203         if mobj is not None:
1204             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1205             if embeds:
1206                 return _playlist_from_matches(
1207                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1208
1209         # Look for Aparat videos
1210         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1211         if mobj is not None:
1212             return self.url_result(mobj.group(1), 'Aparat')
1213
1214         # Look for MPORA videos
1215         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1216         if mobj is not None:
1217             return self.url_result(mobj.group(1), 'Mpora')
1218
1219         # Look for embedded NovaMov-based player
1220         mobj = re.search(
1221             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1222                     (?P<url>http://(?:(?:embed|www)\.)?
1223                         (?:novamov\.com|
1224                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1225                            videoweed\.(?:es|com)|
1226                            movshare\.(?:net|sx|ag)|
1227                            divxstage\.(?:eu|net|ch|co|at|ag))
1228                         /embed\.php.+?)\1''', webpage)
1229         if mobj is not None:
1230             return self.url_result(mobj.group('url'))
1231
1232         # Look for embedded Facebook player
1233         mobj = re.search(
1234             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1235         if mobj is not None:
1236             return self.url_result(mobj.group('url'), 'Facebook')
1237
1238         # Look for embedded VK player
1239         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1240         if mobj is not None:
1241             return self.url_result(mobj.group('url'), 'VK')
1242
1243         # Look for embedded ivi player
1244         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1245         if mobj is not None:
1246             return self.url_result(mobj.group('url'), 'Ivi')
1247
1248         # Look for embedded Huffington Post player
1249         mobj = re.search(
1250             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1251         if mobj is not None:
1252             return self.url_result(mobj.group('url'), 'HuffPost')
1253
1254         # Look for embed.ly
1255         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1256         if mobj is not None:
1257             return self.url_result(mobj.group('url'))
1258         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1259         if mobj is not None:
1260             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1261
1262         # Look for funnyordie embed
1263         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1264         if matches:
1265             return _playlist_from_matches(
1266                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1267
1268         # Look for BBC iPlayer embed
1269         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1270         if matches:
1271             return _playlist_from_matches(matches, ie='BBCCoUk')
1272
1273         # Look for embedded RUTV player
1274         rutv_url = RUTVIE._extract_url(webpage)
1275         if rutv_url:
1276             return self.url_result(rutv_url, 'RUTV')
1277
1278         # Look for embedded SportBox player
1279         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1280         if sportbox_urls:
1281             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1282
1283         # Look for embedded TED player
1284         mobj = re.search(
1285             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1286         if mobj is not None:
1287             return self.url_result(mobj.group('url'), 'TED')
1288
1289         # Look for embedded Ustream videos
1290         mobj = re.search(
1291             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1292         if mobj is not None:
1293             return self.url_result(mobj.group('url'), 'Ustream')
1294
1295         # Look for embedded arte.tv player
1296         mobj = re.search(
1297             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1298             webpage)
1299         if mobj is not None:
1300             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1301
1302         # Look for embedded smotri.com player
1303         smotri_url = SmotriIE._extract_url(webpage)
1304         if smotri_url:
1305             return self.url_result(smotri_url, 'Smotri')
1306
1307         # Look for embeded soundcloud player
1308         mobj = re.search(
1309             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1310             webpage)
1311         if mobj is not None:
1312             url = unescapeHTML(mobj.group('url'))
1313             return self.url_result(url)
1314
1315         # Look for embedded vulture.com player
1316         mobj = re.search(
1317             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1318             webpage)
1319         if mobj is not None:
1320             url = unescapeHTML(mobj.group('url'))
1321             return self.url_result(url, ie='Vulture')
1322
1323         # Look for embedded mtvservices player
1324         mobj = re.search(
1325             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1326             webpage)
1327         if mobj is not None:
1328             url = unescapeHTML(mobj.group('url'))
1329             return self.url_result(url, ie='MTVServicesEmbedded')
1330
1331         # Look for embedded yahoo player
1332         mobj = re.search(
1333             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1334             webpage)
1335         if mobj is not None:
1336             return self.url_result(mobj.group('url'), 'Yahoo')
1337
1338         # Look for embedded sbs.com.au player
1339         mobj = re.search(
1340             r'''(?x)
1341             (?:
1342                 <meta\s+property="og:video"\s+content=|
1343                 <iframe[^>]+?src=
1344             )
1345             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1346             webpage)
1347         if mobj is not None:
1348             return self.url_result(mobj.group('url'), 'SBS')
1349
1350         # Look for embedded Cinchcast player
1351         mobj = re.search(
1352             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1353             webpage)
1354         if mobj is not None:
1355             return self.url_result(mobj.group('url'), 'Cinchcast')
1356
1357         mobj = re.search(
1358             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1359             webpage)
1360         if not mobj:
1361             mobj = re.search(
1362                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1363                 webpage)
1364         if mobj is not None:
1365             return self.url_result(mobj.group('url'), 'MLB')
1366
1367         mobj = re.search(
1368             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1369             webpage)
1370         if mobj is not None:
1371             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1372
1373         mobj = re.search(
1374             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1375             webpage)
1376         if mobj is not None:
1377             return self.url_result(mobj.group('url'), 'Livestream')
1378
1379         # Look for Zapiks embed
1380         mobj = re.search(
1381             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1382         if mobj is not None:
1383             return self.url_result(mobj.group('url'), 'Zapiks')
1384
1385         # Look for Kaltura embeds
1386         mobj = re.search(
1387             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1388         if mobj is not None:
1389             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1390
1391         # Look for Eagle.Platform embeds
1392         mobj = re.search(
1393             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1394         if mobj is not None:
1395             return self.url_result(mobj.group('url'), 'EaglePlatform')
1396
1397         # Look for ClipYou (uses Eagle.Platform) embeds
1398         mobj = re.search(
1399             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1400         if mobj is not None:
1401             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1402
1403         # Look for Pladform embeds
1404         mobj = re.search(
1405             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1406         if mobj is not None:
1407             return self.url_result(mobj.group('url'), 'Pladform')
1408
1409         # Look for Playwire embeds
1410         mobj = re.search(
1411             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1412         if mobj is not None:
1413             return self.url_result(mobj.group('url'))
1414
1415         # Look for 5min embeds
1416         mobj = re.search(
1417             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1418         if mobj is not None:
1419             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1420
1421         # Look for Crooks and Liars embeds
1422         mobj = re.search(
1423             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1424         if mobj is not None:
1425             return self.url_result(mobj.group('url'))
1426
1427         # Look for NBC Sports VPlayer embeds
1428         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1429         if nbc_sports_url:
1430             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1431
1432         # Look for UDN embeds
1433         mobj = re.search(
1434             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1435         if mobj is not None:
1436             return self.url_result(
1437                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1438
1439         # Look for Senate ISVP iframe
1440         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1441         if senate_isvp_url:
1442             return self.url_result(senate_isvp_url, 'SenateISVP')
1443
1444         def check_video(vurl):
1445             if YoutubeIE.suitable(vurl):
1446                 return True
1447             vpath = compat_urlparse.urlparse(vurl).path
1448             vext = determine_ext(vpath)
1449             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1450
1451         def filter_video(urls):
1452             return list(filter(check_video, urls))
1453
1454         # Start with something easy: JW Player in SWFObject
1455         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1456         if not found:
1457             # Look for gorilla-vid style embedding
1458             found = filter_video(re.findall(r'''(?sx)
1459                 (?:
1460                     jw_plugins|
1461                     JWPlayerOptions|
1462                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1463                 )
1464                 .*?
1465                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1466         if not found:
1467             # Broaden the search a little bit
1468             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1469         if not found:
1470             # Broaden the findall a little bit: JWPlayer JS loader
1471             found = filter_video(re.findall(
1472                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1473         if not found:
1474             # Flow player
1475             found = filter_video(re.findall(r'''(?xs)
1476                 flowplayer\("[^"]+",\s*
1477                     \{[^}]+?\}\s*,
1478                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1479                         ["']?url["']?\s*:\s*["']([^"']+)["']
1480             ''', webpage))
1481         if not found:
1482             # Cinerama player
1483             found = re.findall(
1484                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1485         if not found:
1486             # Try to find twitter cards info
1487             found = filter_video(re.findall(
1488                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1489         if not found:
1490             # We look for Open Graph info:
1491             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1492             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1493             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1494             if m_video_type is not None:
1495                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1496         if not found:
1497             # HTML5 video
1498             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1499         if not found:
1500             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1501             found = re.search(
1502                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1503                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1504                 webpage)
1505             if not found:
1506                 # Look also in Refresh HTTP header
1507                 refresh_header = head_response.headers.get('Refresh')
1508                 if refresh_header:
1509                     found = re.search(REDIRECT_REGEX, refresh_header)
1510             if found:
1511                 new_url = compat_urlparse.urljoin(url, found.group(1))
1512                 self.report_following_redirect(new_url)
1513                 return {
1514                     '_type': 'url',
1515                     'url': new_url,
1516                 }
1517         if not found:
1518             raise UnsupportedError(url)
1519
1520         entries = []
1521         for video_url in found:
1522             video_url = compat_urlparse.urljoin(url, video_url)
1523             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1524
1525             # Sometimes, jwplayer extraction will result in a YouTube URL
1526             if YoutubeIE.suitable(video_url):
1527                 entries.append(self.url_result(video_url, 'Youtube'))
1528                 continue
1529
1530             # here's a fun little line of code for you:
1531             video_id = os.path.splitext(video_id)[0]
1532
1533             if determine_ext(video_url) == 'smil':
1534                 entries.append({
1535                     'id': video_id,
1536                     'formats': self._extract_smil_formats(video_url, video_id),
1537                     'uploader': video_uploader,
1538                     'title': video_title,
1539                     'age_limit': age_limit,
1540                 })
1541             else:
1542                 entries.append({
1543                     'id': video_id,
1544                     'url': video_url,
1545                     'uploader': video_uploader,
1546                     'title': video_title,
1547                     'age_limit': age_limit,
1548                 })
1549
1550         if len(entries) == 1:
1551             return entries[0]
1552         else:
1553             for num, e in enumerate(entries, start=1):
1554                 # 'url' results don't have a title
1555                 if e.get('title') is not None:
1556                     e['title'] = '%s (%d)' % (e['title'], num)
1557             return {
1558                 '_type': 'playlist',
1559                 'entries': entries,
1560             }