[extractor/generic] Add test for svt embed
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7
8 from .common import InfoExtractor
9 from .youtube import YoutubeIE
10 from ..compat import (
11     compat_urllib_parse,
12     compat_urlparse,
13     compat_xml_parse_error,
14 )
15 from ..utils import (
16     determine_ext,
17     ExtractorError,
18     float_or_none,
19     HEADRequest,
20     is_html,
21     orderedSet,
22     parse_xml,
23     smuggle_url,
24     unescapeHTML,
25     unified_strdate,
26     unsmuggle_url,
27     UnsupportedError,
28     url_basename,
29     xpath_text,
30 )
31 from .brightcove import BrightcoveIE
32 from .nbc import NBCSportsVPlayerIE
33 from .ooyala import OoyalaIE
34 from .rutv import RUTVIE
35 from .smotri import SmotriIE
36 from .condenast import CondeNastIE
37 from .udn import UDNEmbedIE
38 from .senateisvp import SenateISVPIE
39 from .bliptv import BlipTVIE
40 from .svt import SVTIE
41
42
43 class GenericIE(InfoExtractor):
44     IE_DESC = 'Generic downloader that works on some sites'
45     _VALID_URL = r'.*'
46     IE_NAME = 'generic'
47     _TESTS = [
48         {
49             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
50             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
51             'info_dict': {
52                 'id': '13601338388002',
53                 'ext': 'mp4',
54                 'uploader': 'www.hodiho.fr',
55                 'title': 'R\u00e9gis plante sa Jeep',
56             }
57         },
58         # bandcamp page with custom domain
59         {
60             'add_ie': ['Bandcamp'],
61             'url': 'http://bronyrock.com/track/the-pony-mash',
62             'info_dict': {
63                 'id': '3235767654',
64                 'ext': 'mp3',
65                 'title': 'The Pony Mash',
66                 'uploader': 'M_Pallante',
67             },
68             'skip': 'There is a limit of 200 free downloads / month for the test song',
69         },
70         # embedded brightcove video
71         # it also tests brightcove videos that need to set the 'Referer' in the
72         # http requests
73         {
74             'add_ie': ['Brightcove'],
75             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
76             'info_dict': {
77                 'id': '2765128793001',
78                 'ext': 'mp4',
79                 'title': 'Le cours de bourse : l’analyse technique',
80                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
81                 'uploader': 'BFM BUSINESS',
82             },
83             'params': {
84                 'skip_download': True,
85             },
86         },
87         {
88             # https://github.com/rg3/youtube-dl/issues/2253
89             'url': 'http://bcove.me/i6nfkrc3',
90             'md5': '0ba9446db037002366bab3b3eb30c88c',
91             'info_dict': {
92                 'id': '3101154703001',
93                 'ext': 'mp4',
94                 'title': 'Still no power',
95                 'uploader': 'thestar.com',
96                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
97             },
98             'add_ie': ['Brightcove'],
99         },
100         {
101             'url': 'http://www.championat.com/video/football/v/87/87499.html',
102             'md5': 'fb973ecf6e4a78a67453647444222983',
103             'info_dict': {
104                 'id': '3414141473001',
105                 'ext': 'mp4',
106                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
107                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
108                 'uploader': 'Championat',
109             },
110         },
111         {
112             # https://github.com/rg3/youtube-dl/issues/3541
113             'add_ie': ['Brightcove'],
114             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
115             'info_dict': {
116                 'id': '3866516442001',
117                 'ext': 'mp4',
118                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
119                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
120                 'uploader': 'SBS Broadcasting',
121             },
122             'skip': 'Restricted to Netherlands',
123             'params': {
124                 'skip_download': True,  # m3u8 download
125             },
126         },
127         # Direct link to a video
128         {
129             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
130             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
131             'info_dict': {
132                 'id': 'trailer',
133                 'ext': 'mp4',
134                 'title': 'trailer',
135                 'upload_date': '20100513',
136             }
137         },
138         # ooyala video
139         {
140             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
141             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
142             'info_dict': {
143                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
144                 'ext': 'mp4',
145                 'title': '2cc213299525360.mov',  # that's what we get
146             },
147             'add_ie': ['Ooyala'],
148         },
149         # multiple ooyala embeds on SBN network websites
150         {
151             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
152             'info_dict': {
153                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
154                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
155             },
156             'playlist_mincount': 3,
157             'params': {
158                 'skip_download': True,
159             },
160             'add_ie': ['Ooyala'],
161         },
162         # google redirect
163         {
164             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
165             'info_dict': {
166                 'id': 'cmQHVoWB5FY',
167                 'ext': 'mp4',
168                 'upload_date': '20130224',
169                 'uploader_id': 'TheVerge',
170                 'description': 're:^Chris Ziegler takes a look at the\.*',
171                 'uploader': 'The Verge',
172                 'title': 'First Firefox OS phones side-by-side',
173             },
174             'params': {
175                 'skip_download': False,
176             }
177         },
178         # embed.ly video
179         {
180             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
181             'info_dict': {
182                 'id': '9ODmcdjQcHQ',
183                 'ext': 'mp4',
184                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
185                 'upload_date': '20140225',
186                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
187                 'uploader': 'Tested',
188                 'uploader_id': 'testedcom',
189             },
190             # No need to test YoutubeIE here
191             'params': {
192                 'skip_download': True,
193             },
194         },
195         # funnyordie embed
196         {
197             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
198             'info_dict': {
199                 'id': '18e820ec3f',
200                 'ext': 'mp4',
201                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
202                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
203             },
204         },
205         # BBC iPlayer embeds
206         {
207             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
208             'info_dict': {
209                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
210             },
211             'playlist_mincount': 18,
212         },
213         # RUTV embed
214         {
215             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
216             'info_dict': {
217                 'id': '776940',
218                 'ext': 'mp4',
219                 'title': 'Охотское море стало целиком российским',
220                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
221             },
222             'params': {
223                 # m3u8 download
224                 'skip_download': True,
225             },
226         },
227         # Embedded TED video
228         {
229             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
230             'md5': '65fdff94098e4a607385a60c5177c638',
231             'info_dict': {
232                 'id': '1969',
233                 'ext': 'mp4',
234                 'title': 'Hidden miracles of the natural world',
235                 'uploader': 'Louie Schwartzberg',
236                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
237             }
238         },
239         # Embeded Ustream video
240         {
241             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
242             'md5': '27b99cdb639c9b12a79bca876a073417',
243             'info_dict': {
244                 'id': '45734260',
245                 'ext': 'flv',
246                 'uploader': 'AU SPA:  The NSA and Privacy',
247                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
248             }
249         },
250         # nowvideo embed hidden behind percent encoding
251         {
252             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
253             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
254             'info_dict': {
255                 'id': '06e53103ca9aa',
256                 'ext': 'flv',
257                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
258                 'description': 'No description',
259             },
260         },
261         # arte embed
262         {
263             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
264             'md5': '7653032cbb25bf6c80d80f217055fa43',
265             'info_dict': {
266                 'id': '048195-004_PLUS7-F',
267                 'ext': 'flv',
268                 'title': 'X:enius',
269                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
270                 'upload_date': '20140320',
271             },
272             'params': {
273                 'skip_download': 'Requires rtmpdump'
274             }
275         },
276         # Condé Nast embed
277         {
278             'url': 'http://www.wired.com/2014/04/honda-asimo/',
279             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
280             'info_dict': {
281                 'id': '53501be369702d3275860000',
282                 'ext': 'mp4',
283                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
284             }
285         },
286         # Dailymotion embed
287         {
288             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
289             'md5': '441aeeb82eb72c422c7f14ec533999cd',
290             'info_dict': {
291                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
292                 'ext': 'mp4',
293                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
294                 'uploader': 'Spi0n',
295             },
296             'add_ie': ['Dailymotion'],
297         },
298         # YouTube embed
299         {
300             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
301             'info_dict': {
302                 'id': 'FXRb4ykk4S0',
303                 'ext': 'mp4',
304                 'title': 'The NBL Auction 2014',
305                 'uploader': 'BADMINTON England',
306                 'uploader_id': 'BADMINTONEvents',
307                 'upload_date': '20140603',
308                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
309             },
310             'add_ie': ['Youtube'],
311             'params': {
312                 'skip_download': True,
313             }
314         },
315         # MTVSercices embed
316         {
317             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
318             'md5': '35727f82f58c76d996fc188f9755b0d5',
319             'info_dict': {
320                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
321                 'ext': 'mp4',
322                 'title': 'Review',
323                 'description': 'Mario\'s life in the fast lane has never looked so good.',
324             },
325         },
326         # YouTube embed via <data-embed-url="">
327         {
328             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
329             'info_dict': {
330                 'id': '4vAffPZIT44',
331                 'ext': 'mp4',
332                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
333                 'uploader': 'Gameloft',
334                 'uploader_id': 'gameloft',
335                 'upload_date': '20140828',
336                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
337             },
338             'params': {
339                 'skip_download': True,
340             }
341         },
342         # Camtasia studio
343         {
344             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
345             'playlist': [{
346                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
347                 'info_dict': {
348                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
349                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
350                     'ext': 'flv',
351                     'duration': 2235.90,
352                 }
353             }, {
354                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
355                 'info_dict': {
356                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
357                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
358                     'ext': 'flv',
359                     'duration': 2235.93,
360                 }
361             }],
362             'info_dict': {
363                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
364             }
365         },
366         # Flowplayer
367         {
368             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
369             'md5': '9d65602bf31c6e20014319c7d07fba27',
370             'info_dict': {
371                 'id': '5123ea6d5e5a7',
372                 'ext': 'mp4',
373                 'age_limit': 18,
374                 'uploader': 'www.handjobhub.com',
375                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
376             }
377         },
378         # RSS feed
379         {
380             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
381             'info_dict': {
382                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
383                 'title': 'Zero Punctuation',
384                 'description': 're:.*groundbreaking video review series.*'
385             },
386             'playlist_mincount': 11,
387         },
388         # Multiple brightcove videos
389         # https://github.com/rg3/youtube-dl/issues/2283
390         {
391             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
392             'info_dict': {
393                 'id': 'always-never',
394                 'title': 'Always / Never - The New Yorker',
395             },
396             'playlist_count': 3,
397             'params': {
398                 'extract_flat': False,
399                 'skip_download': True,
400             }
401         },
402         # MLB embed
403         {
404             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
405             'md5': '96f09a37e44da40dd083e12d9a683327',
406             'info_dict': {
407                 'id': '33322633',
408                 'ext': 'mp4',
409                 'title': 'Ump changes call to ball',
410                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
411                 'duration': 48,
412                 'timestamp': 1401537900,
413                 'upload_date': '20140531',
414                 'thumbnail': 're:^https?://.*\.jpg$',
415             },
416         },
417         # MLB articles
418         {
419             'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
420             'md5': 'b190e70141fb9a1552a85426b4da1b5d',
421             'info_dict': {
422                 'id': '75609783',
423                 'ext': 'mp4',
424                 'title': 'Must C: Pillar climbs for catch',
425                 'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
426                 'timestamp': 1429124820,
427                 'upload_date': '20150415',
428             }
429         },
430         # Wistia embed
431         {
432             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
433             'md5': '8788b683c777a5cf25621eaf286d0c23',
434             'info_dict': {
435                 'id': '1cfaf6b7ea',
436                 'ext': 'mov',
437                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
438                 'duration': 643.0,
439                 'filesize': 182808282,
440                 'uploader': 'education-portal.com',
441             },
442         },
443         {
444             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
445             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
446             'info_dict': {
447                 'id': 'uxjb0lwrcz',
448                 'ext': 'mp4',
449                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
450                 'duration': 1715.0,
451                 'uploader': 'thoughtworks.wistia.com',
452             },
453         },
454         # Direct download with broken HEAD
455         {
456             'url': 'http://ai-radio.org:8000/radio.opus',
457             'info_dict': {
458                 'id': 'radio',
459                 'ext': 'opus',
460                 'title': 'radio',
461             },
462             'params': {
463                 'skip_download': True,  # infinite live stream
464             },
465             'expected_warnings': [
466                 r'501.*Not Implemented'
467             ],
468         },
469         # Soundcloud embed
470         {
471             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
472             'info_dict': {
473                 'id': '174391317',
474                 'ext': 'mp3',
475                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
476                 'uploader': 'Sophos Security',
477                 'title': 'Chet Chat 171 - Oct 29, 2014',
478                 'upload_date': '20141029',
479             }
480         },
481         # Livestream embed
482         {
483             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
484             'info_dict': {
485                 'id': '67864563',
486                 'ext': 'flv',
487                 'upload_date': '20141112',
488                 'title': 'Rosetta #CometLanding webcast HL 10',
489             }
490         },
491         # LazyYT
492         {
493             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
494             'info_dict': {
495                 'id': '1986',
496                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
497             },
498             'playlist_mincount': 2,
499         },
500         # Direct link with incorrect MIME type
501         {
502             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
503             'md5': '4ccbebe5f36706d85221f204d7eb5913',
504             'info_dict': {
505                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
506                 'id': '5_Lennart_Poettering_-_Systemd',
507                 'ext': 'webm',
508                 'title': '5_Lennart_Poettering_-_Systemd',
509                 'upload_date': '20141120',
510             },
511             'expected_warnings': [
512                 'URL could be a direct video link, returning it as such.'
513             ]
514         },
515         # Cinchcast embed
516         {
517             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
518             'info_dict': {
519                 'id': '7141703',
520                 'ext': 'mp3',
521                 'upload_date': '20141126',
522                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
523             }
524         },
525         # Cinerama player
526         {
527             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
528             'info_dict': {
529                 'id': '730m_DandD_1901_512k',
530                 'ext': 'mp4',
531                 'uploader': 'www.abc.net.au',
532                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
533             }
534         },
535         # embedded viddler video
536         {
537             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
538             'info_dict': {
539                 'id': '4d03aad9',
540                 'ext': 'mp4',
541                 'uploader': 'deadspin',
542                 'title': 'WALL-TO-GORTAT',
543                 'timestamp': 1422285291,
544                 'upload_date': '20150126',
545             },
546             'add_ie': ['Viddler'],
547         },
548         # Libsyn embed
549         {
550             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
551             'info_dict': {
552                 'id': '3377616',
553                 'ext': 'mp3',
554                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
555                 'description': 'md5:601cb790edd05908957dae8aaa866465',
556                 'upload_date': '20150220',
557             },
558         },
559         # jwplayer YouTube
560         {
561             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
562             'info_dict': {
563                 'id': 'Mrj4DVp2zeA',
564                 'ext': 'mp4',
565                 'upload_date': '20150212',
566                 'uploader': 'The National Archives UK',
567                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
568                 'uploader_id': 'NationalArchives08',
569                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
570             },
571         },
572         # rtl.nl embed
573         {
574             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
575             'playlist_mincount': 5,
576             'info_dict': {
577                 'id': 'aanslagen-kopenhagen',
578                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
579             }
580         },
581         # Zapiks embed
582         {
583             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
584             'info_dict': {
585                 'id': '118046',
586                 'ext': 'mp4',
587                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
588             }
589         },
590         # Kaltura embed
591         {
592             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
593             'info_dict': {
594                 'id': '1_eergr3h1',
595                 'ext': 'mp4',
596                 'upload_date': '20150226',
597                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
598                 'timestamp': int,
599                 'title': 'John Carlson Postgame 2/25/15',
600             },
601         },
602         # Eagle.Platform embed (generic URL)
603         {
604             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
605             'info_dict': {
606                 'id': '227304',
607                 'ext': 'mp4',
608                 'title': 'Навальный вышел на свободу',
609                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
610                 'thumbnail': 're:^https?://.*\.jpg$',
611                 'duration': 87,
612                 'view_count': int,
613                 'age_limit': 0,
614             },
615         },
616         # ClipYou (Eagle.Platform) embed (custom URL)
617         {
618             'url': 'http://muz-tv.ru/play/7129/',
619             'info_dict': {
620                 'id': '12820',
621                 'ext': 'mp4',
622                 'title': "'O Sole Mio",
623                 'thumbnail': 're:^https?://.*\.jpg$',
624                 'duration': 216,
625                 'view_count': int,
626             },
627         },
628         # Pladform embed
629         {
630             'url': 'http://muz-tv.ru/kinozal/view/7400/',
631             'info_dict': {
632                 'id': '100183293',
633                 'ext': 'mp4',
634                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
635                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
636                 'thumbnail': 're:^https?://.*\.jpg$',
637                 'duration': 694,
638                 'age_limit': 0,
639             },
640         },
641         # Playwire embed
642         {
643             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
644             'info_dict': {
645                 'id': '3519514',
646                 'ext': 'mp4',
647                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
648                 'thumbnail': 're:^https?://.*\.png$',
649                 'duration': 45.115,
650             },
651         },
652         # 5min embed
653         {
654             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
655             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
656             'info_dict': {
657                 'id': '518726732',
658                 'ext': 'mp4',
659                 'title': 'Facebook Creates "On This Day" | Crunch Report',
660             },
661         },
662         # SVT embed
663         {
664             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
665             'info_dict': {
666                 'id': '2900353',
667                 'ext': 'flv',
668                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
669                 'duration': 27,
670                 'age_limit': 0,
671             },
672         },
673         # RSS feed with enclosure
674         {
675             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
676             'info_dict': {
677                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
678                 'ext': 'm4v',
679                 'upload_date': '20150228',
680                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
681             }
682         },
683         # Crooks and Liars embed
684         {
685             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
686             'info_dict': {
687                 'id': '8RUoRhRi',
688                 'ext': 'mp4',
689                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
690                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
691                 'timestamp': 1428207000,
692                 'upload_date': '20150405',
693                 'uploader': 'Heather',
694             },
695         },
696         # Crooks and Liars external embed
697         {
698             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
699             'info_dict': {
700                 'id': 'MTE3MjUtMzQ2MzA',
701                 'ext': 'mp4',
702                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
703                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
704                 'timestamp': 1265032391,
705                 'upload_date': '20100201',
706                 'uploader': 'Heather',
707             },
708         },
709         # NBC Sports vplayer embed
710         {
711             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
712             'info_dict': {
713                 'id': 'ln7x1qSThw4k',
714                 'ext': 'flv',
715                 'title': "PFT Live: New leader in the 'new-look' defense",
716                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
717             },
718         },
719         # UDN embed
720         {
721             'url': 'http://www.udn.com/news/story/7314/822787',
722             'md5': 'fd2060e988c326991037b9aff9df21a6',
723             'info_dict': {
724                 'id': '300346',
725                 'ext': 'mp4',
726                 'title': '中一中男師變性 全校師生力挺',
727                 'thumbnail': 're:^https?://.*\.jpg$',
728             }
729         },
730         # Ooyala embed
731         {
732             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
733             'info_dict': {
734                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
735                 'ext': 'mp4',
736                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
737                 'title': 'This is what separates the Excel masters from the wannabes',
738             },
739             'params': {
740                 # m3u8 downloads
741                 'skip_download': True,
742             }
743         },
744         # Contains a SMIL manifest
745         {
746             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
747             'info_dict': {
748                 'id': 'file',
749                 'ext': 'flv',
750                 'title': '+ Football: Lottery Champions League Europe',
751                 'uploader': 'www.telewebion.com',
752             },
753             'params': {
754                 # rtmpe downloads
755                 'skip_download': True,
756             }
757         }
758     ]
759
760     def report_following_redirect(self, new_url):
761         """Report information extraction."""
762         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
763
764     def _extract_rss(self, url, video_id, doc):
765         playlist_title = doc.find('./channel/title').text
766         playlist_desc_el = doc.find('./channel/description')
767         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
768
769         entries = []
770         for it in doc.findall('./channel/item'):
771             next_url = xpath_text(it, 'link', fatal=False)
772             if not next_url:
773                 enclosure_nodes = it.findall('./enclosure')
774                 for e in enclosure_nodes:
775                     next_url = e.attrib.get('url')
776                     if next_url:
777                         break
778
779             if not next_url:
780                 continue
781
782             entries.append({
783                 '_type': 'url',
784                 'url': next_url,
785                 'title': it.find('title').text,
786             })
787
788         return {
789             '_type': 'playlist',
790             'id': url,
791             'title': playlist_title,
792             'description': playlist_desc,
793             'entries': entries,
794         }
795
796     def _extract_camtasia(self, url, video_id, webpage):
797         """ Returns None if no camtasia video can be found. """
798
799         camtasia_cfg = self._search_regex(
800             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
801             webpage, 'camtasia configuration file', default=None)
802         if camtasia_cfg is None:
803             return None
804
805         title = self._html_search_meta('DC.title', webpage, fatal=True)
806
807         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
808         camtasia_cfg = self._download_xml(
809             camtasia_url, video_id,
810             note='Downloading camtasia configuration',
811             errnote='Failed to download camtasia configuration')
812         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
813
814         entries = []
815         for n in fileset_node.getchildren():
816             url_n = n.find('./uri')
817             if url_n is None:
818                 continue
819
820             entries.append({
821                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
822                 'title': '%s - %s' % (title, n.tag),
823                 'url': compat_urlparse.urljoin(url, url_n.text),
824                 'duration': float_or_none(n.find('./duration').text),
825             })
826
827         return {
828             '_type': 'playlist',
829             'entries': entries,
830             'title': title,
831         }
832
833     def _real_extract(self, url):
834         if url.startswith('//'):
835             return {
836                 '_type': 'url',
837                 'url': self.http_scheme() + url,
838             }
839
840         parsed_url = compat_urlparse.urlparse(url)
841         if not parsed_url.scheme:
842             default_search = self._downloader.params.get('default_search')
843             if default_search is None:
844                 default_search = 'fixup_error'
845
846             if default_search in ('auto', 'auto_warning', 'fixup_error'):
847                 if '/' in url:
848                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
849                     return self.url_result('http://' + url)
850                 elif default_search != 'fixup_error':
851                     if default_search == 'auto_warning':
852                         if re.match(r'^(?:url|URL)$', url):
853                             raise ExtractorError(
854                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
855                                 expected=True)
856                         else:
857                             self._downloader.report_warning(
858                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
859                     return self.url_result('ytsearch:' + url)
860
861             if default_search in ('error', 'fixup_error'):
862                 raise ExtractorError(
863                     '%r is not a valid URL. '
864                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
865                     % (url, url), expected=True)
866             else:
867                 if ':' not in default_search:
868                     default_search += ':'
869                 return self.url_result(default_search + url)
870
871         url, smuggled_data = unsmuggle_url(url)
872         force_videoid = None
873         is_intentional = smuggled_data and smuggled_data.get('to_generic')
874         if smuggled_data and 'force_videoid' in smuggled_data:
875             force_videoid = smuggled_data['force_videoid']
876             video_id = force_videoid
877         else:
878             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
879
880         self.to_screen('%s: Requesting header' % video_id)
881
882         head_req = HEADRequest(url)
883         head_response = self._request_webpage(
884             head_req, video_id,
885             note=False, errnote='Could not send HEAD request to %s' % url,
886             fatal=False)
887
888         if head_response is not False:
889             # Check for redirect
890             new_url = head_response.geturl()
891             if url != new_url:
892                 self.report_following_redirect(new_url)
893                 if force_videoid:
894                     new_url = smuggle_url(
895                         new_url, {'force_videoid': force_videoid})
896                 return self.url_result(new_url)
897
898         full_response = None
899         if head_response is False:
900             full_response = self._request_webpage(url, video_id)
901             head_response = full_response
902
903         # Check for direct link to a video
904         content_type = head_response.headers.get('Content-Type', '')
905         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
906         if m:
907             upload_date = unified_strdate(
908                 head_response.headers.get('Last-Modified'))
909             return {
910                 'id': video_id,
911                 'title': os.path.splitext(url_basename(url))[0],
912                 'direct': True,
913                 'formats': [{
914                     'format_id': m.group('format_id'),
915                     'url': url,
916                     'vcodec': 'none' if m.group('type') == 'audio' else None
917                 }],
918                 'upload_date': upload_date,
919             }
920
921         if not self._downloader.params.get('test', False) and not is_intentional:
922             self._downloader.report_warning('Falling back on generic information extractor.')
923
924         if not full_response:
925             full_response = self._request_webpage(url, video_id)
926
927         # Maybe it's a direct link to a video?
928         # Be careful not to download the whole thing!
929         first_bytes = full_response.read(512)
930         if not is_html(first_bytes):
931             self._downloader.report_warning(
932                 'URL could be a direct video link, returning it as such.')
933             upload_date = unified_strdate(
934                 head_response.headers.get('Last-Modified'))
935             return {
936                 'id': video_id,
937                 'title': os.path.splitext(url_basename(url))[0],
938                 'direct': True,
939                 'url': url,
940                 'upload_date': upload_date,
941             }
942
943         webpage = self._webpage_read_content(
944             full_response, url, video_id, prefix=first_bytes)
945
946         self.report_extraction(video_id)
947
948         # Is it an RSS feed?
949         try:
950             doc = parse_xml(webpage)
951             if doc.tag == 'rss':
952                 return self._extract_rss(url, video_id, doc)
953         except compat_xml_parse_error:
954             pass
955
956         # Is it a Camtasia project?
957         camtasia_res = self._extract_camtasia(url, video_id, webpage)
958         if camtasia_res is not None:
959             return camtasia_res
960
961         # Sometimes embedded video player is hidden behind percent encoding
962         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
963         # Unescaping the whole page allows to handle those cases in a generic way
964         webpage = compat_urllib_parse.unquote(webpage)
965
966         # it's tempting to parse this further, but you would
967         # have to take into account all the variations like
968         #   Video Title - Site Name
969         #   Site Name | Video Title
970         #   Video Title - Tagline | Site Name
971         # and so on and so forth; it's just not practical
972         video_title = self._html_search_regex(
973             r'(?s)<title>(.*?)</title>', webpage, 'video title',
974             default='video')
975
976         # Try to detect age limit automatically
977         age_limit = self._rta_search(webpage)
978         # And then there are the jokers who advertise that they use RTA,
979         # but actually don't.
980         AGE_LIMIT_MARKERS = [
981             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
982         ]
983         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
984             age_limit = 18
985
986         # video uploader is domain name
987         video_uploader = self._search_regex(
988             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
989
990         # Helper method
991         def _playlist_from_matches(matches, getter=None, ie=None):
992             urlrs = orderedSet(
993                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
994                 for m in matches)
995             return self.playlist_result(
996                 urlrs, playlist_id=video_id, playlist_title=video_title)
997
998         # Look for BrightCove:
999         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1000         if bc_urls:
1001             self.to_screen('Brightcove video detected.')
1002             entries = [{
1003                 '_type': 'url',
1004                 'url': smuggle_url(bc_url, {'Referer': url}),
1005                 'ie_key': 'Brightcove'
1006             } for bc_url in bc_urls]
1007
1008             return {
1009                 '_type': 'playlist',
1010                 'title': video_title,
1011                 'id': video_id,
1012                 'entries': entries,
1013             }
1014
1015         # Look for embedded rtl.nl player
1016         matches = re.findall(
1017             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
1018             webpage)
1019         if matches:
1020             return _playlist_from_matches(matches, ie='RtlNl')
1021
1022         # Look for embedded (iframe) Vimeo player
1023         mobj = re.search(
1024             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
1025         if mobj:
1026             player_url = unescapeHTML(mobj.group('url'))
1027             surl = smuggle_url(player_url, {'Referer': url})
1028             return self.url_result(surl)
1029         # Look for embedded (swf embed) Vimeo player
1030         mobj = re.search(
1031             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1032         if mobj:
1033             return self.url_result(mobj.group(1))
1034
1035         # Look for embedded YouTube player
1036         matches = re.findall(r'''(?x)
1037             (?:
1038                 <iframe[^>]+?src=|
1039                 data-video-url=|
1040                 <embed[^>]+?src=|
1041                 embedSWF\(?:\s*|
1042                 new\s+SWFObject\(
1043             )
1044             (["\'])
1045                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1046                 (?:embed|v|p)/.+?)
1047             \1''', webpage)
1048         if matches:
1049             return _playlist_from_matches(
1050                 matches, lambda m: unescapeHTML(m[1]))
1051
1052         # Look for lazyYT YouTube embed
1053         matches = re.findall(
1054             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1055         if matches:
1056             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1057
1058         # Look for embedded Dailymotion player
1059         matches = re.findall(
1060             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1061         if matches:
1062             return _playlist_from_matches(
1063                 matches, lambda m: unescapeHTML(m[1]))
1064
1065         # Look for embedded Dailymotion playlist player (#3822)
1066         m = re.search(
1067             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1068         if m:
1069             playlists = re.findall(
1070                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1071             if playlists:
1072                 return _playlist_from_matches(
1073                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1074
1075         # Look for embedded Wistia player
1076         match = re.search(
1077             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1078         if match:
1079             embed_url = self._proto_relative_url(
1080                 unescapeHTML(match.group('url')))
1081             return {
1082                 '_type': 'url_transparent',
1083                 'url': embed_url,
1084                 'ie_key': 'Wistia',
1085                 'uploader': video_uploader,
1086                 'title': video_title,
1087                 'id': video_id,
1088             }
1089
1090         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1091         if match:
1092             return {
1093                 '_type': 'url_transparent',
1094                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1095                 'ie_key': 'Wistia',
1096                 'uploader': video_uploader,
1097                 'title': video_title,
1098                 'id': match.group('id')
1099             }
1100
1101         # Look for embedded blip.tv player
1102         bliptv_url = BlipTVIE._extract_url(webpage)
1103         if bliptv_url:
1104             return self.url_result(bliptv_url, 'BlipTV')
1105
1106         # Look for SVT player
1107         svt_url = SVTIE._extract_url(webpage)
1108         if svt_url:
1109             return self.url_result(svt_url, 'SVT')
1110
1111         # Look for embedded condenast player
1112         matches = re.findall(
1113             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1114             webpage)
1115         if matches:
1116             return {
1117                 '_type': 'playlist',
1118                 'entries': [{
1119                     '_type': 'url',
1120                     'ie_key': 'CondeNast',
1121                     'url': ma,
1122                 } for ma in matches],
1123                 'title': video_title,
1124                 'id': video_id,
1125             }
1126
1127         # Look for Bandcamp pages with custom domain
1128         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1129         if mobj is not None:
1130             burl = unescapeHTML(mobj.group(1))
1131             # Don't set the extractor because it can be a track url or an album
1132             return self.url_result(burl)
1133
1134         # Look for embedded Vevo player
1135         mobj = re.search(
1136             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1137         if mobj is not None:
1138             return self.url_result(mobj.group('url'))
1139
1140         # Look for embedded Viddler player
1141         mobj = re.search(
1142             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1143             webpage)
1144         if mobj is not None:
1145             return self.url_result(mobj.group('url'))
1146
1147         # Look for NYTimes player
1148         mobj = re.search(
1149             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1150             webpage)
1151         if mobj is not None:
1152             return self.url_result(mobj.group('url'))
1153
1154         # Look for Libsyn player
1155         mobj = re.search(
1156             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1157         if mobj is not None:
1158             return self.url_result(mobj.group('url'))
1159
1160         # Look for Ooyala videos
1161         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1162                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1163                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1164                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1165         if mobj is not None:
1166             return OoyalaIE._build_url_result(mobj.group('ec'))
1167
1168         # Look for multiple Ooyala embeds on SBN network websites
1169         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1170         if mobj is not None:
1171             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1172             if embeds:
1173                 return _playlist_from_matches(
1174                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1175
1176         # Look for Aparat videos
1177         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1178         if mobj is not None:
1179             return self.url_result(mobj.group(1), 'Aparat')
1180
1181         # Look for MPORA videos
1182         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1183         if mobj is not None:
1184             return self.url_result(mobj.group(1), 'Mpora')
1185
1186         # Look for embedded NovaMov-based player
1187         mobj = re.search(
1188             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1189                     (?P<url>http://(?:(?:embed|www)\.)?
1190                         (?:novamov\.com|
1191                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1192                            videoweed\.(?:es|com)|
1193                            movshare\.(?:net|sx|ag)|
1194                            divxstage\.(?:eu|net|ch|co|at|ag))
1195                         /embed\.php.+?)\1''', webpage)
1196         if mobj is not None:
1197             return self.url_result(mobj.group('url'))
1198
1199         # Look for embedded Facebook player
1200         mobj = re.search(
1201             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1202         if mobj is not None:
1203             return self.url_result(mobj.group('url'), 'Facebook')
1204
1205         # Look for embedded VK player
1206         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1207         if mobj is not None:
1208             return self.url_result(mobj.group('url'), 'VK')
1209
1210         # Look for embedded ivi player
1211         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1212         if mobj is not None:
1213             return self.url_result(mobj.group('url'), 'Ivi')
1214
1215         # Look for embedded Huffington Post player
1216         mobj = re.search(
1217             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1218         if mobj is not None:
1219             return self.url_result(mobj.group('url'), 'HuffPost')
1220
1221         # Look for embed.ly
1222         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1223         if mobj is not None:
1224             return self.url_result(mobj.group('url'))
1225         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1226         if mobj is not None:
1227             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1228
1229         # Look for funnyordie embed
1230         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1231         if matches:
1232             return _playlist_from_matches(
1233                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1234
1235         # Look for BBC iPlayer embed
1236         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1237         if matches:
1238             return _playlist_from_matches(matches, ie='BBCCoUk')
1239
1240         # Look for embedded RUTV player
1241         rutv_url = RUTVIE._extract_url(webpage)
1242         if rutv_url:
1243             return self.url_result(rutv_url, 'RUTV')
1244
1245         # Look for embedded TED player
1246         mobj = re.search(
1247             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1248         if mobj is not None:
1249             return self.url_result(mobj.group('url'), 'TED')
1250
1251         # Look for embedded Ustream videos
1252         mobj = re.search(
1253             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1254         if mobj is not None:
1255             return self.url_result(mobj.group('url'), 'Ustream')
1256
1257         # Look for embedded arte.tv player
1258         mobj = re.search(
1259             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1260             webpage)
1261         if mobj is not None:
1262             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1263
1264         # Look for embedded smotri.com player
1265         smotri_url = SmotriIE._extract_url(webpage)
1266         if smotri_url:
1267             return self.url_result(smotri_url, 'Smotri')
1268
1269         # Look for embeded soundcloud player
1270         mobj = re.search(
1271             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1272             webpage)
1273         if mobj is not None:
1274             url = unescapeHTML(mobj.group('url'))
1275             return self.url_result(url)
1276
1277         # Look for embedded vulture.com player
1278         mobj = re.search(
1279             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1280             webpage)
1281         if mobj is not None:
1282             url = unescapeHTML(mobj.group('url'))
1283             return self.url_result(url, ie='Vulture')
1284
1285         # Look for embedded mtvservices player
1286         mobj = re.search(
1287             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1288             webpage)
1289         if mobj is not None:
1290             url = unescapeHTML(mobj.group('url'))
1291             return self.url_result(url, ie='MTVServicesEmbedded')
1292
1293         # Look for embedded yahoo player
1294         mobj = re.search(
1295             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1296             webpage)
1297         if mobj is not None:
1298             return self.url_result(mobj.group('url'), 'Yahoo')
1299
1300         # Look for embedded sbs.com.au player
1301         mobj = re.search(
1302             r'''(?x)
1303             (?:
1304                 <meta\s+property="og:video"\s+content=|
1305                 <iframe[^>]+?src=
1306             )
1307             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1308             webpage)
1309         if mobj is not None:
1310             return self.url_result(mobj.group('url'), 'SBS')
1311
1312         # Look for embedded Cinchcast player
1313         mobj = re.search(
1314             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1315             webpage)
1316         if mobj is not None:
1317             return self.url_result(mobj.group('url'), 'Cinchcast')
1318
1319         mobj = re.search(
1320             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1321             webpage)
1322         if not mobj:
1323             mobj = re.search(
1324                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1325                 webpage)
1326         if mobj is not None:
1327             return self.url_result(mobj.group('url'), 'MLB')
1328
1329         mobj = re.search(
1330             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1331             webpage)
1332         if mobj is not None:
1333             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1334
1335         mobj = re.search(
1336             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1337             webpage)
1338         if mobj is not None:
1339             return self.url_result(mobj.group('url'), 'Livestream')
1340
1341         # Look for Zapiks embed
1342         mobj = re.search(
1343             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1344         if mobj is not None:
1345             return self.url_result(mobj.group('url'), 'Zapiks')
1346
1347         # Look for Kaltura embeds
1348         mobj = re.search(
1349             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1350         if mobj is not None:
1351             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1352
1353         # Look for Eagle.Platform embeds
1354         mobj = re.search(
1355             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1356         if mobj is not None:
1357             return self.url_result(mobj.group('url'), 'EaglePlatform')
1358
1359         # Look for ClipYou (uses Eagle.Platform) embeds
1360         mobj = re.search(
1361             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1362         if mobj is not None:
1363             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1364
1365         # Look for Pladform embeds
1366         mobj = re.search(
1367             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1368         if mobj is not None:
1369             return self.url_result(mobj.group('url'), 'Pladform')
1370
1371         # Look for Playwire embeds
1372         mobj = re.search(
1373             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1374         if mobj is not None:
1375             return self.url_result(mobj.group('url'))
1376
1377         # Look for 5min embeds
1378         mobj = re.search(
1379             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1380         if mobj is not None:
1381             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1382
1383         # Look for Crooks and Liars embeds
1384         mobj = re.search(
1385             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1386         if mobj is not None:
1387             return self.url_result(mobj.group('url'))
1388
1389         # Look for NBC Sports VPlayer embeds
1390         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1391         if nbc_sports_url:
1392             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1393
1394         # Look for UDN embeds
1395         mobj = re.search(
1396             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1397         if mobj is not None:
1398             return self.url_result(
1399                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1400
1401         # Look for Senate ISVP iframe
1402         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1403         if senate_isvp_url:
1404             return self.url_result(surl, 'SenateISVP')
1405
1406         def check_video(vurl):
1407             if YoutubeIE.suitable(vurl):
1408                 return True
1409             vpath = compat_urlparse.urlparse(vurl).path
1410             vext = determine_ext(vpath)
1411             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1412
1413         def filter_video(urls):
1414             return list(filter(check_video, urls))
1415
1416         # Start with something easy: JW Player in SWFObject
1417         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1418         if not found:
1419             # Look for gorilla-vid style embedding
1420             found = filter_video(re.findall(r'''(?sx)
1421                 (?:
1422                     jw_plugins|
1423                     JWPlayerOptions|
1424                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1425                 )
1426                 .*?
1427                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1428         if not found:
1429             # Broaden the search a little bit
1430             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1431         if not found:
1432             # Broaden the findall a little bit: JWPlayer JS loader
1433             found = filter_video(re.findall(
1434                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1435         if not found:
1436             # Flow player
1437             found = filter_video(re.findall(r'''(?xs)
1438                 flowplayer\("[^"]+",\s*
1439                     \{[^}]+?\}\s*,
1440                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1441                         ["']?url["']?\s*:\s*["']([^"']+)["']
1442             ''', webpage))
1443         if not found:
1444             # Cinerama player
1445             found = re.findall(
1446                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1447         if not found:
1448             # Try to find twitter cards info
1449             found = filter_video(re.findall(
1450                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1451         if not found:
1452             # We look for Open Graph info:
1453             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1454             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1455             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1456             if m_video_type is not None:
1457                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1458         if not found:
1459             # HTML5 video
1460             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1461         if not found:
1462             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1463             found = re.search(
1464                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1465                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1466                 webpage)
1467             if not found:
1468                 # Look also in Refresh HTTP header
1469                 refresh_header = head_response.headers.get('Refresh')
1470                 if refresh_header:
1471                     found = re.search(REDIRECT_REGEX, refresh_header)
1472             if found:
1473                 new_url = compat_urlparse.urljoin(url, found.group(1))
1474                 self.report_following_redirect(new_url)
1475                 return {
1476                     '_type': 'url',
1477                     'url': new_url,
1478                 }
1479         if not found:
1480             raise UnsupportedError(url)
1481
1482         entries = []
1483         for video_url in found:
1484             video_url = compat_urlparse.urljoin(url, video_url)
1485             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1486
1487             # Sometimes, jwplayer extraction will result in a YouTube URL
1488             if YoutubeIE.suitable(video_url):
1489                 entries.append(self.url_result(video_url, 'Youtube'))
1490                 continue
1491
1492             # here's a fun little line of code for you:
1493             video_id = os.path.splitext(video_id)[0]
1494
1495             if determine_ext(video_url) == 'smil':
1496                 entries.append({
1497                     'id': video_id,
1498                     'formats': self._extract_smil_formats(video_url, video_id),
1499                     'uploader': video_uploader,
1500                     'title': video_title,
1501                     'age_limit': age_limit,
1502                 })
1503             else:
1504                 entries.append({
1505                     'id': video_id,
1506                     'url': video_url,
1507                     'uploader': video_uploader,
1508                     'title': video_title,
1509                     'age_limit': age_limit,
1510                 })
1511
1512         if len(entries) == 1:
1513             return entries[0]
1514         else:
1515             for num, e in enumerate(entries, start=1):
1516                 # 'url' results don't have a title
1517                 if e.get('title') is not None:
1518                     e['title'] = '%s (%d)' % (e['title'], num)
1519             return {
1520                 '_type': 'playlist',
1521                 'entries': entries,
1522             }