[udn] Add new extractor
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7
8 from .common import InfoExtractor
9 from .youtube import YoutubeIE
10 from ..compat import (
11     compat_urllib_parse,
12     compat_urlparse,
13     compat_xml_parse_error,
14 )
15 from ..utils import (
16     determine_ext,
17     ExtractorError,
18     float_or_none,
19     HEADRequest,
20     is_html,
21     orderedSet,
22     parse_xml,
23     smuggle_url,
24     unescapeHTML,
25     unified_strdate,
26     unsmuggle_url,
27     UnsupportedError,
28     url_basename,
29     url_infer_protocol,
30     xpath_text,
31 )
32 from .brightcove import BrightcoveIE
33 from .nbc import NBCSportsVPlayerIE
34 from .ooyala import OoyalaIE
35 from .rutv import RUTVIE
36 from .smotri import SmotriIE
37 from .condenast import CondeNastIE
38 from .udn import UDNEmbedIE
39
40
41 class GenericIE(InfoExtractor):
42     IE_DESC = 'Generic downloader that works on some sites'
43     _VALID_URL = r'.*'
44     IE_NAME = 'generic'
45     _TESTS = [
46         {
47             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
48             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
49             'info_dict': {
50                 'id': '13601338388002',
51                 'ext': 'mp4',
52                 'uploader': 'www.hodiho.fr',
53                 'title': 'R\u00e9gis plante sa Jeep',
54             }
55         },
56         # bandcamp page with custom domain
57         {
58             'add_ie': ['Bandcamp'],
59             'url': 'http://bronyrock.com/track/the-pony-mash',
60             'info_dict': {
61                 'id': '3235767654',
62                 'ext': 'mp3',
63                 'title': 'The Pony Mash',
64                 'uploader': 'M_Pallante',
65             },
66             'skip': 'There is a limit of 200 free downloads / month for the test song',
67         },
68         # embedded brightcove video
69         # it also tests brightcove videos that need to set the 'Referer' in the
70         # http requests
71         {
72             'add_ie': ['Brightcove'],
73             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
74             'info_dict': {
75                 'id': '2765128793001',
76                 'ext': 'mp4',
77                 'title': 'Le cours de bourse : l’analyse technique',
78                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
79                 'uploader': 'BFM BUSINESS',
80             },
81             'params': {
82                 'skip_download': True,
83             },
84         },
85         {
86             # https://github.com/rg3/youtube-dl/issues/2253
87             'url': 'http://bcove.me/i6nfkrc3',
88             'md5': '0ba9446db037002366bab3b3eb30c88c',
89             'info_dict': {
90                 'id': '3101154703001',
91                 'ext': 'mp4',
92                 'title': 'Still no power',
93                 'uploader': 'thestar.com',
94                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
95             },
96             'add_ie': ['Brightcove'],
97         },
98         {
99             'url': 'http://www.championat.com/video/football/v/87/87499.html',
100             'md5': 'fb973ecf6e4a78a67453647444222983',
101             'info_dict': {
102                 'id': '3414141473001',
103                 'ext': 'mp4',
104                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
105                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
106                 'uploader': 'Championat',
107             },
108         },
109         {
110             # https://github.com/rg3/youtube-dl/issues/3541
111             'add_ie': ['Brightcove'],
112             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
113             'info_dict': {
114                 'id': '3866516442001',
115                 'ext': 'mp4',
116                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
117                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
118                 'uploader': 'SBS Broadcasting',
119             },
120             'skip': 'Restricted to Netherlands',
121             'params': {
122                 'skip_download': True,  # m3u8 download
123             },
124         },
125         # Direct link to a video
126         {
127             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
128             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
129             'info_dict': {
130                 'id': 'trailer',
131                 'ext': 'mp4',
132                 'title': 'trailer',
133                 'upload_date': '20100513',
134             }
135         },
136         # ooyala video
137         {
138             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
139             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
140             'info_dict': {
141                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
142                 'ext': 'mp4',
143                 'title': '2cc213299525360.mov',  # that's what we get
144             },
145             'add_ie': ['Ooyala'],
146         },
147         # multiple ooyala embeds on SBN network websites
148         {
149             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
150             'info_dict': {
151                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
152                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
153             },
154             'playlist_mincount': 3,
155             'params': {
156                 'skip_download': True,
157             },
158             'add_ie': ['Ooyala'],
159         },
160         # google redirect
161         {
162             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
163             'info_dict': {
164                 'id': 'cmQHVoWB5FY',
165                 'ext': 'mp4',
166                 'upload_date': '20130224',
167                 'uploader_id': 'TheVerge',
168                 'description': 're:^Chris Ziegler takes a look at the\.*',
169                 'uploader': 'The Verge',
170                 'title': 'First Firefox OS phones side-by-side',
171             },
172             'params': {
173                 'skip_download': False,
174             }
175         },
176         # embed.ly video
177         {
178             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
179             'info_dict': {
180                 'id': '9ODmcdjQcHQ',
181                 'ext': 'mp4',
182                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
183                 'upload_date': '20140225',
184                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
185                 'uploader': 'Tested',
186                 'uploader_id': 'testedcom',
187             },
188             # No need to test YoutubeIE here
189             'params': {
190                 'skip_download': True,
191             },
192         },
193         # funnyordie embed
194         {
195             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
196             'info_dict': {
197                 'id': '18e820ec3f',
198                 'ext': 'mp4',
199                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
200                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
201             },
202         },
203         # BBC iPlayer embeds
204         {
205             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
206             'info_dict': {
207                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
208             },
209             'playlist_mincount': 18,
210         },
211         # RUTV embed
212         {
213             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
214             'info_dict': {
215                 'id': '776940',
216                 'ext': 'mp4',
217                 'title': 'Охотское море стало целиком российским',
218                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
219             },
220             'params': {
221                 # m3u8 download
222                 'skip_download': True,
223             },
224         },
225         # Embedded TED video
226         {
227             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
228             'md5': '65fdff94098e4a607385a60c5177c638',
229             'info_dict': {
230                 'id': '1969',
231                 'ext': 'mp4',
232                 'title': 'Hidden miracles of the natural world',
233                 'uploader': 'Louie Schwartzberg',
234                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
235             }
236         },
237         # Embeded Ustream video
238         {
239             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
240             'md5': '27b99cdb639c9b12a79bca876a073417',
241             'info_dict': {
242                 'id': '45734260',
243                 'ext': 'flv',
244                 'uploader': 'AU SPA:  The NSA and Privacy',
245                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
246             }
247         },
248         # nowvideo embed hidden behind percent encoding
249         {
250             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
251             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
252             'info_dict': {
253                 'id': '06e53103ca9aa',
254                 'ext': 'flv',
255                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
256                 'description': 'No description',
257             },
258         },
259         # arte embed
260         {
261             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
262             'md5': '7653032cbb25bf6c80d80f217055fa43',
263             'info_dict': {
264                 'id': '048195-004_PLUS7-F',
265                 'ext': 'flv',
266                 'title': 'X:enius',
267                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
268                 'upload_date': '20140320',
269             },
270             'params': {
271                 'skip_download': 'Requires rtmpdump'
272             }
273         },
274         # Condé Nast embed
275         {
276             'url': 'http://www.wired.com/2014/04/honda-asimo/',
277             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
278             'info_dict': {
279                 'id': '53501be369702d3275860000',
280                 'ext': 'mp4',
281                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
282             }
283         },
284         # Dailymotion embed
285         {
286             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
287             'md5': '441aeeb82eb72c422c7f14ec533999cd',
288             'info_dict': {
289                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
290                 'ext': 'mp4',
291                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
292                 'uploader': 'Spi0n',
293             },
294             'add_ie': ['Dailymotion'],
295         },
296         # YouTube embed
297         {
298             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
299             'info_dict': {
300                 'id': 'FXRb4ykk4S0',
301                 'ext': 'mp4',
302                 'title': 'The NBL Auction 2014',
303                 'uploader': 'BADMINTON England',
304                 'uploader_id': 'BADMINTONEvents',
305                 'upload_date': '20140603',
306                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
307             },
308             'add_ie': ['Youtube'],
309             'params': {
310                 'skip_download': True,
311             }
312         },
313         # MTVSercices embed
314         {
315             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
316             'md5': '35727f82f58c76d996fc188f9755b0d5',
317             'info_dict': {
318                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
319                 'ext': 'mp4',
320                 'title': 'Review',
321                 'description': 'Mario\'s life in the fast lane has never looked so good.',
322             },
323         },
324         # YouTube embed via <data-embed-url="">
325         {
326             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
327             'info_dict': {
328                 'id': '4vAffPZIT44',
329                 'ext': 'mp4',
330                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
331                 'uploader': 'Gameloft',
332                 'uploader_id': 'gameloft',
333                 'upload_date': '20140828',
334                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
335             },
336             'params': {
337                 'skip_download': True,
338             }
339         },
340         # Camtasia studio
341         {
342             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
343             'playlist': [{
344                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
345                 'info_dict': {
346                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
347                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
348                     'ext': 'flv',
349                     'duration': 2235.90,
350                 }
351             }, {
352                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
353                 'info_dict': {
354                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
355                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
356                     'ext': 'flv',
357                     'duration': 2235.93,
358                 }
359             }],
360             'info_dict': {
361                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
362             }
363         },
364         # Flowplayer
365         {
366             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
367             'md5': '9d65602bf31c6e20014319c7d07fba27',
368             'info_dict': {
369                 'id': '5123ea6d5e5a7',
370                 'ext': 'mp4',
371                 'age_limit': 18,
372                 'uploader': 'www.handjobhub.com',
373                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
374             }
375         },
376         # RSS feed
377         {
378             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
379             'info_dict': {
380                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
381                 'title': 'Zero Punctuation',
382                 'description': 're:.*groundbreaking video review series.*'
383             },
384             'playlist_mincount': 11,
385         },
386         # Multiple brightcove videos
387         # https://github.com/rg3/youtube-dl/issues/2283
388         {
389             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
390             'info_dict': {
391                 'id': 'always-never',
392                 'title': 'Always / Never - The New Yorker',
393             },
394             'playlist_count': 3,
395             'params': {
396                 'extract_flat': False,
397                 'skip_download': True,
398             }
399         },
400         # MLB embed
401         {
402             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
403             'md5': '96f09a37e44da40dd083e12d9a683327',
404             'info_dict': {
405                 'id': '33322633',
406                 'ext': 'mp4',
407                 'title': 'Ump changes call to ball',
408                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
409                 'duration': 48,
410                 'timestamp': 1401537900,
411                 'upload_date': '20140531',
412                 'thumbnail': 're:^https?://.*\.jpg$',
413             },
414         },
415         # Wistia embed
416         {
417             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
418             'md5': '8788b683c777a5cf25621eaf286d0c23',
419             'info_dict': {
420                 'id': '1cfaf6b7ea',
421                 'ext': 'mov',
422                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
423                 'duration': 643.0,
424                 'filesize': 182808282,
425                 'uploader': 'education-portal.com',
426             },
427         },
428         {
429             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
430             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
431             'info_dict': {
432                 'id': 'uxjb0lwrcz',
433                 'ext': 'mp4',
434                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
435                 'duration': 1715.0,
436                 'uploader': 'thoughtworks.wistia.com',
437             },
438         },
439         # Direct download with broken HEAD
440         {
441             'url': 'http://ai-radio.org:8000/radio.opus',
442             'info_dict': {
443                 'id': 'radio',
444                 'ext': 'opus',
445                 'title': 'radio',
446             },
447             'params': {
448                 'skip_download': True,  # infinite live stream
449             },
450             'expected_warnings': [
451                 r'501.*Not Implemented'
452             ],
453         },
454         # Soundcloud embed
455         {
456             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
457             'info_dict': {
458                 'id': '174391317',
459                 'ext': 'mp3',
460                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
461                 'uploader': 'Sophos Security',
462                 'title': 'Chet Chat 171 - Oct 29, 2014',
463                 'upload_date': '20141029',
464             }
465         },
466         # Livestream embed
467         {
468             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
469             'info_dict': {
470                 'id': '67864563',
471                 'ext': 'flv',
472                 'upload_date': '20141112',
473                 'title': 'Rosetta #CometLanding webcast HL 10',
474             }
475         },
476         # LazyYT
477         {
478             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
479             'info_dict': {
480                 'id': '1986',
481                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
482             },
483             'playlist_mincount': 2,
484         },
485         # Direct link with incorrect MIME type
486         {
487             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
488             'md5': '4ccbebe5f36706d85221f204d7eb5913',
489             'info_dict': {
490                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
491                 'id': '5_Lennart_Poettering_-_Systemd',
492                 'ext': 'webm',
493                 'title': '5_Lennart_Poettering_-_Systemd',
494                 'upload_date': '20141120',
495             },
496             'expected_warnings': [
497                 'URL could be a direct video link, returning it as such.'
498             ]
499         },
500         # Cinchcast embed
501         {
502             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
503             'info_dict': {
504                 'id': '7141703',
505                 'ext': 'mp3',
506                 'upload_date': '20141126',
507                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
508             }
509         },
510         # Cinerama player
511         {
512             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
513             'info_dict': {
514                 'id': '730m_DandD_1901_512k',
515                 'ext': 'mp4',
516                 'uploader': 'www.abc.net.au',
517                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
518             }
519         },
520         # embedded viddler video
521         {
522             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
523             'info_dict': {
524                 'id': '4d03aad9',
525                 'ext': 'mp4',
526                 'uploader': 'deadspin',
527                 'title': 'WALL-TO-GORTAT',
528                 'timestamp': 1422285291,
529                 'upload_date': '20150126',
530             },
531             'add_ie': ['Viddler'],
532         },
533         # Libsyn embed
534         {
535             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
536             'info_dict': {
537                 'id': '3377616',
538                 'ext': 'mp3',
539                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
540                 'description': 'md5:601cb790edd05908957dae8aaa866465',
541                 'upload_date': '20150220',
542             },
543         },
544         # jwplayer YouTube
545         {
546             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
547             'info_dict': {
548                 'id': 'Mrj4DVp2zeA',
549                 'ext': 'mp4',
550                 'upload_date': '20150212',
551                 'uploader': 'The National Archives UK',
552                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
553                 'uploader_id': 'NationalArchives08',
554                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
555             },
556         },
557         # rtl.nl embed
558         {
559             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
560             'playlist_mincount': 5,
561             'info_dict': {
562                 'id': 'aanslagen-kopenhagen',
563                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
564             }
565         },
566         # Zapiks embed
567         {
568             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
569             'info_dict': {
570                 'id': '118046',
571                 'ext': 'mp4',
572                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
573             }
574         },
575         # Kaltura embed
576         {
577             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
578             'info_dict': {
579                 'id': '1_eergr3h1',
580                 'ext': 'mp4',
581                 'upload_date': '20150226',
582                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
583                 'timestamp': int,
584                 'title': 'John Carlson Postgame 2/25/15',
585             },
586         },
587         # Eagle.Platform embed (generic URL)
588         {
589             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
590             'info_dict': {
591                 'id': '227304',
592                 'ext': 'mp4',
593                 'title': 'Навальный вышел на свободу',
594                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
595                 'thumbnail': 're:^https?://.*\.jpg$',
596                 'duration': 87,
597                 'view_count': int,
598                 'age_limit': 0,
599             },
600         },
601         # ClipYou (Eagle.Platform) embed (custom URL)
602         {
603             'url': 'http://muz-tv.ru/play/7129/',
604             'info_dict': {
605                 'id': '12820',
606                 'ext': 'mp4',
607                 'title': "'O Sole Mio",
608                 'thumbnail': 're:^https?://.*\.jpg$',
609                 'duration': 216,
610                 'view_count': int,
611             },
612         },
613         # Pladform embed
614         {
615             'url': 'http://muz-tv.ru/kinozal/view/7400/',
616             'info_dict': {
617                 'id': '100183293',
618                 'ext': 'mp4',
619                 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
620                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
621                 'thumbnail': 're:^https?://.*\.jpg$',
622                 'duration': 694,
623                 'age_limit': 0,
624             },
625         },
626         # 5min embed
627         {
628             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
629             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
630             'info_dict': {
631                 'id': '518726732',
632                 'ext': 'mp4',
633                 'title': 'Facebook Creates "On This Day" | Crunch Report',
634             },
635         },
636         # RSS feed with enclosure
637         {
638             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
639             'info_dict': {
640                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
641                 'ext': 'm4v',
642                 'upload_date': '20150228',
643                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
644             }
645         },
646         # NBC Sports vplayer embed
647         {
648             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
649             'info_dict': {
650                 'id': 'ln7x1qSThw4k',
651                 'ext': 'flv',
652                 'title': "PFT Live: New leader in the 'new-look' defense",
653                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
654             },
655         },
656         # UDN embed
657         {
658             'url': 'http://www.udn.com/news/story/7314/822787',
659             'md5': 'de06b4c90b042c128395a88f0384817e',
660             'info_dict': {
661                 'id': '300040',
662                 'ext': 'mp4',
663                 'title': '生物老師男變女 全校挺"做自己"',
664                 'thumbnail': 're:^https?://.*\.jpg$',
665             }
666         }
667     ]
668
669     def report_following_redirect(self, new_url):
670         """Report information extraction."""
671         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
672
673     def _extract_rss(self, url, video_id, doc):
674         playlist_title = doc.find('./channel/title').text
675         playlist_desc_el = doc.find('./channel/description')
676         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
677
678         entries = []
679         for it in doc.findall('./channel/item'):
680             next_url = xpath_text(it, 'link', fatal=False)
681             if not next_url:
682                 enclosure_nodes = it.findall('./enclosure')
683                 for e in enclosure_nodes:
684                     next_url = e.attrib.get('url')
685                     if next_url:
686                         break
687
688             if not next_url:
689                 continue
690
691             entries.append({
692                 '_type': 'url',
693                 'url': next_url,
694                 'title': it.find('title').text,
695             })
696
697         return {
698             '_type': 'playlist',
699             'id': url,
700             'title': playlist_title,
701             'description': playlist_desc,
702             'entries': entries,
703         }
704
705     def _extract_camtasia(self, url, video_id, webpage):
706         """ Returns None if no camtasia video can be found. """
707
708         camtasia_cfg = self._search_regex(
709             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
710             webpage, 'camtasia configuration file', default=None)
711         if camtasia_cfg is None:
712             return None
713
714         title = self._html_search_meta('DC.title', webpage, fatal=True)
715
716         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
717         camtasia_cfg = self._download_xml(
718             camtasia_url, video_id,
719             note='Downloading camtasia configuration',
720             errnote='Failed to download camtasia configuration')
721         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
722
723         entries = []
724         for n in fileset_node.getchildren():
725             url_n = n.find('./uri')
726             if url_n is None:
727                 continue
728
729             entries.append({
730                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
731                 'title': '%s - %s' % (title, n.tag),
732                 'url': compat_urlparse.urljoin(url, url_n.text),
733                 'duration': float_or_none(n.find('./duration').text),
734             })
735
736         return {
737             '_type': 'playlist',
738             'entries': entries,
739             'title': title,
740         }
741
742     def _real_extract(self, url):
743         if url.startswith('//'):
744             return {
745                 '_type': 'url',
746                 'url': self.http_scheme() + url,
747             }
748
749         parsed_url = compat_urlparse.urlparse(url)
750         if not parsed_url.scheme:
751             default_search = self._downloader.params.get('default_search')
752             if default_search is None:
753                 default_search = 'fixup_error'
754
755             if default_search in ('auto', 'auto_warning', 'fixup_error'):
756                 if '/' in url:
757                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
758                     return self.url_result('http://' + url)
759                 elif default_search != 'fixup_error':
760                     if default_search == 'auto_warning':
761                         if re.match(r'^(?:url|URL)$', url):
762                             raise ExtractorError(
763                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
764                                 expected=True)
765                         else:
766                             self._downloader.report_warning(
767                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
768                     return self.url_result('ytsearch:' + url)
769
770             if default_search in ('error', 'fixup_error'):
771                 raise ExtractorError(
772                     '%r is not a valid URL. '
773                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
774                     % (url, url), expected=True)
775             else:
776                 if ':' not in default_search:
777                     default_search += ':'
778                 return self.url_result(default_search + url)
779
780         url, smuggled_data = unsmuggle_url(url)
781         force_videoid = None
782         is_intentional = smuggled_data and smuggled_data.get('to_generic')
783         if smuggled_data and 'force_videoid' in smuggled_data:
784             force_videoid = smuggled_data['force_videoid']
785             video_id = force_videoid
786         else:
787             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
788
789         self.to_screen('%s: Requesting header' % video_id)
790
791         head_req = HEADRequest(url)
792         head_response = self._request_webpage(
793             head_req, video_id,
794             note=False, errnote='Could not send HEAD request to %s' % url,
795             fatal=False)
796
797         if head_response is not False:
798             # Check for redirect
799             new_url = head_response.geturl()
800             if url != new_url:
801                 self.report_following_redirect(new_url)
802                 if force_videoid:
803                     new_url = smuggle_url(
804                         new_url, {'force_videoid': force_videoid})
805                 return self.url_result(new_url)
806
807         full_response = None
808         if head_response is False:
809             full_response = self._request_webpage(url, video_id)
810             head_response = full_response
811
812         # Check for direct link to a video
813         content_type = head_response.headers.get('Content-Type', '')
814         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
815         if m:
816             upload_date = unified_strdate(
817                 head_response.headers.get('Last-Modified'))
818             return {
819                 'id': video_id,
820                 'title': os.path.splitext(url_basename(url))[0],
821                 'direct': True,
822                 'formats': [{
823                     'format_id': m.group('format_id'),
824                     'url': url,
825                     'vcodec': 'none' if m.group('type') == 'audio' else None
826                 }],
827                 'upload_date': upload_date,
828             }
829
830         if not self._downloader.params.get('test', False) and not is_intentional:
831             self._downloader.report_warning('Falling back on generic information extractor.')
832
833         if not full_response:
834             full_response = self._request_webpage(url, video_id)
835
836         # Maybe it's a direct link to a video?
837         # Be careful not to download the whole thing!
838         first_bytes = full_response.read(512)
839         if not is_html(first_bytes):
840             self._downloader.report_warning(
841                 'URL could be a direct video link, returning it as such.')
842             upload_date = unified_strdate(
843                 head_response.headers.get('Last-Modified'))
844             return {
845                 'id': video_id,
846                 'title': os.path.splitext(url_basename(url))[0],
847                 'direct': True,
848                 'url': url,
849                 'upload_date': upload_date,
850             }
851
852         webpage = self._webpage_read_content(
853             full_response, url, video_id, prefix=first_bytes)
854
855         self.report_extraction(video_id)
856
857         # Is it an RSS feed?
858         try:
859             doc = parse_xml(webpage)
860             if doc.tag == 'rss':
861                 return self._extract_rss(url, video_id, doc)
862         except compat_xml_parse_error:
863             pass
864
865         # Is it a Camtasia project?
866         camtasia_res = self._extract_camtasia(url, video_id, webpage)
867         if camtasia_res is not None:
868             return camtasia_res
869
870         # Sometimes embedded video player is hidden behind percent encoding
871         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
872         # Unescaping the whole page allows to handle those cases in a generic way
873         webpage = compat_urllib_parse.unquote(webpage)
874
875         # it's tempting to parse this further, but you would
876         # have to take into account all the variations like
877         #   Video Title - Site Name
878         #   Site Name | Video Title
879         #   Video Title - Tagline | Site Name
880         # and so on and so forth; it's just not practical
881         video_title = self._html_search_regex(
882             r'(?s)<title>(.*?)</title>', webpage, 'video title',
883             default='video')
884
885         # Try to detect age limit automatically
886         age_limit = self._rta_search(webpage)
887         # And then there are the jokers who advertise that they use RTA,
888         # but actually don't.
889         AGE_LIMIT_MARKERS = [
890             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
891         ]
892         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
893             age_limit = 18
894
895         # video uploader is domain name
896         video_uploader = self._search_regex(
897             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
898
899         # Helper method
900         def _playlist_from_matches(matches, getter=None, ie=None):
901             urlrs = orderedSet(
902                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
903                 for m in matches)
904             return self.playlist_result(
905                 urlrs, playlist_id=video_id, playlist_title=video_title)
906
907         # Look for BrightCove:
908         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
909         if bc_urls:
910             self.to_screen('Brightcove video detected.')
911             entries = [{
912                 '_type': 'url',
913                 'url': smuggle_url(bc_url, {'Referer': url}),
914                 'ie_key': 'Brightcove'
915             } for bc_url in bc_urls]
916
917             return {
918                 '_type': 'playlist',
919                 'title': video_title,
920                 'id': video_id,
921                 'entries': entries,
922             }
923
924         # Look for embedded rtl.nl player
925         matches = re.findall(
926             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
927             webpage)
928         if matches:
929             return _playlist_from_matches(matches, ie='RtlNl')
930
931         # Look for embedded (iframe) Vimeo player
932         mobj = re.search(
933             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
934         if mobj:
935             player_url = unescapeHTML(mobj.group('url'))
936             surl = smuggle_url(player_url, {'Referer': url})
937             return self.url_result(surl)
938         # Look for embedded (swf embed) Vimeo player
939         mobj = re.search(
940             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
941         if mobj:
942             return self.url_result(mobj.group(1))
943
944         # Look for embedded YouTube player
945         matches = re.findall(r'''(?x)
946             (?:
947                 <iframe[^>]+?src=|
948                 data-video-url=|
949                 <embed[^>]+?src=|
950                 embedSWF\(?:\s*|
951                 new\s+SWFObject\(
952             )
953             (["\'])
954                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
955                 (?:embed|v|p)/.+?)
956             \1''', webpage)
957         if matches:
958             return _playlist_from_matches(
959                 matches, lambda m: unescapeHTML(m[1]))
960
961         # Look for lazyYT YouTube embed
962         matches = re.findall(
963             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
964         if matches:
965             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
966
967         # Look for embedded Dailymotion player
968         matches = re.findall(
969             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
970         if matches:
971             return _playlist_from_matches(
972                 matches, lambda m: unescapeHTML(m[1]))
973
974         # Look for embedded Dailymotion playlist player (#3822)
975         m = re.search(
976             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
977         if m:
978             playlists = re.findall(
979                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
980             if playlists:
981                 return _playlist_from_matches(
982                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
983
984         # Look for embedded Wistia player
985         match = re.search(
986             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
987         if match:
988             embed_url = self._proto_relative_url(
989                 unescapeHTML(match.group('url')))
990             return {
991                 '_type': 'url_transparent',
992                 'url': embed_url,
993                 'ie_key': 'Wistia',
994                 'uploader': video_uploader,
995                 'title': video_title,
996                 'id': video_id,
997             }
998
999         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1000         if match:
1001             return {
1002                 '_type': 'url_transparent',
1003                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1004                 'ie_key': 'Wistia',
1005                 'uploader': video_uploader,
1006                 'title': video_title,
1007                 'id': match.group('id')
1008             }
1009
1010         # Look for embedded blip.tv player
1011         mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
1012         if mobj:
1013             return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
1014         mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
1015         if mobj:
1016             return self.url_result(mobj.group(1), 'BlipTV')
1017
1018         # Look for embedded condenast player
1019         matches = re.findall(
1020             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1021             webpage)
1022         if matches:
1023             return {
1024                 '_type': 'playlist',
1025                 'entries': [{
1026                     '_type': 'url',
1027                     'ie_key': 'CondeNast',
1028                     'url': ma,
1029                 } for ma in matches],
1030                 'title': video_title,
1031                 'id': video_id,
1032             }
1033
1034         # Look for Bandcamp pages with custom domain
1035         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1036         if mobj is not None:
1037             burl = unescapeHTML(mobj.group(1))
1038             # Don't set the extractor because it can be a track url or an album
1039             return self.url_result(burl)
1040
1041         # Look for embedded Vevo player
1042         mobj = re.search(
1043             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1044         if mobj is not None:
1045             return self.url_result(mobj.group('url'))
1046
1047         # Look for embedded Viddler player
1048         mobj = re.search(
1049             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1050             webpage)
1051         if mobj is not None:
1052             return self.url_result(mobj.group('url'))
1053
1054         # Look for NYTimes player
1055         mobj = re.search(
1056             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1057             webpage)
1058         if mobj is not None:
1059             return self.url_result(mobj.group('url'))
1060
1061         # Look for Libsyn player
1062         mobj = re.search(
1063             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1064         if mobj is not None:
1065             return self.url_result(mobj.group('url'))
1066
1067         # Look for Ooyala videos
1068         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1069                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1070                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
1071         if mobj is not None:
1072             return OoyalaIE._build_url_result(mobj.group('ec'))
1073
1074         # Look for multiple Ooyala embeds on SBN network websites
1075         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1076         if mobj is not None:
1077             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1078             if embeds:
1079                 return _playlist_from_matches(
1080                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1081
1082         # Look for Aparat videos
1083         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1084         if mobj is not None:
1085             return self.url_result(mobj.group(1), 'Aparat')
1086
1087         # Look for MPORA videos
1088         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1089         if mobj is not None:
1090             return self.url_result(mobj.group(1), 'Mpora')
1091
1092         # Look for embedded NovaMov-based player
1093         mobj = re.search(
1094             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1095                     (?P<url>http://(?:(?:embed|www)\.)?
1096                         (?:novamov\.com|
1097                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1098                            videoweed\.(?:es|com)|
1099                            movshare\.(?:net|sx|ag)|
1100                            divxstage\.(?:eu|net|ch|co|at|ag))
1101                         /embed\.php.+?)\1''', webpage)
1102         if mobj is not None:
1103             return self.url_result(mobj.group('url'))
1104
1105         # Look for embedded Facebook player
1106         mobj = re.search(
1107             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1108         if mobj is not None:
1109             return self.url_result(mobj.group('url'), 'Facebook')
1110
1111         # Look for embedded VK player
1112         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1113         if mobj is not None:
1114             return self.url_result(mobj.group('url'), 'VK')
1115
1116         # Look for embedded ivi player
1117         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1118         if mobj is not None:
1119             return self.url_result(mobj.group('url'), 'Ivi')
1120
1121         # Look for embedded Huffington Post player
1122         mobj = re.search(
1123             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1124         if mobj is not None:
1125             return self.url_result(mobj.group('url'), 'HuffPost')
1126
1127         # Look for embed.ly
1128         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1129         if mobj is not None:
1130             return self.url_result(mobj.group('url'))
1131         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1132         if mobj is not None:
1133             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1134
1135         # Look for funnyordie embed
1136         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1137         if matches:
1138             return _playlist_from_matches(
1139                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1140
1141         # Look for BBC iPlayer embed
1142         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1143         if matches:
1144             return _playlist_from_matches(matches, ie='BBCCoUk')
1145
1146         # Look for embedded RUTV player
1147         rutv_url = RUTVIE._extract_url(webpage)
1148         if rutv_url:
1149             return self.url_result(rutv_url, 'RUTV')
1150
1151         # Look for embedded TED player
1152         mobj = re.search(
1153             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1154         if mobj is not None:
1155             return self.url_result(mobj.group('url'), 'TED')
1156
1157         # Look for embedded Ustream videos
1158         mobj = re.search(
1159             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1160         if mobj is not None:
1161             return self.url_result(mobj.group('url'), 'Ustream')
1162
1163         # Look for embedded arte.tv player
1164         mobj = re.search(
1165             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1166             webpage)
1167         if mobj is not None:
1168             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1169
1170         # Look for embedded smotri.com player
1171         smotri_url = SmotriIE._extract_url(webpage)
1172         if smotri_url:
1173             return self.url_result(smotri_url, 'Smotri')
1174
1175         # Look for embeded soundcloud player
1176         mobj = re.search(
1177             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1178             webpage)
1179         if mobj is not None:
1180             url = unescapeHTML(mobj.group('url'))
1181             return self.url_result(url)
1182
1183         # Look for embedded vulture.com player
1184         mobj = re.search(
1185             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1186             webpage)
1187         if mobj is not None:
1188             url = unescapeHTML(mobj.group('url'))
1189             return self.url_result(url, ie='Vulture')
1190
1191         # Look for embedded mtvservices player
1192         mobj = re.search(
1193             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1194             webpage)
1195         if mobj is not None:
1196             url = unescapeHTML(mobj.group('url'))
1197             return self.url_result(url, ie='MTVServicesEmbedded')
1198
1199         # Look for embedded yahoo player
1200         mobj = re.search(
1201             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1202             webpage)
1203         if mobj is not None:
1204             return self.url_result(mobj.group('url'), 'Yahoo')
1205
1206         # Look for embedded sbs.com.au player
1207         mobj = re.search(
1208             r'''(?x)
1209             (?:
1210                 <meta\s+property="og:video"\s+content=|
1211                 <iframe[^>]+?src=
1212             )
1213             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1214             webpage)
1215         if mobj is not None:
1216             return self.url_result(mobj.group('url'), 'SBS')
1217
1218         # Look for embedded Cinchcast player
1219         mobj = re.search(
1220             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1221             webpage)
1222         if mobj is not None:
1223             return self.url_result(mobj.group('url'), 'Cinchcast')
1224
1225         mobj = re.search(
1226             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1227             webpage)
1228         if mobj is not None:
1229             return self.url_result(mobj.group('url'), 'MLB')
1230
1231         mobj = re.search(
1232             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1233             webpage)
1234         if mobj is not None:
1235             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1236
1237         mobj = re.search(
1238             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1239             webpage)
1240         if mobj is not None:
1241             return self.url_result(mobj.group('url'), 'Livestream')
1242
1243         # Look for Zapiks embed
1244         mobj = re.search(
1245             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1246         if mobj is not None:
1247             return self.url_result(mobj.group('url'), 'Zapiks')
1248
1249         # Look for Kaltura embeds
1250         mobj = re.search(
1251             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1252         if mobj is not None:
1253             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1254
1255         # Look for Eagle.Platform embeds
1256         mobj = re.search(
1257             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1258         if mobj is not None:
1259             return self.url_result(mobj.group('url'), 'EaglePlatform')
1260
1261         # Look for ClipYou (uses Eagle.Platform) embeds
1262         mobj = re.search(
1263             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1264         if mobj is not None:
1265             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1266
1267         # Look for Pladform embeds
1268         mobj = re.search(
1269             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1270         if mobj is not None:
1271             return self.url_result(mobj.group('url'), 'Pladform')
1272
1273         # Look for 5min embeds
1274         mobj = re.search(
1275             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1276         if mobj is not None:
1277             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1278
1279         # Look for NBC Sports VPlayer embeds
1280         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1281         if nbc_sports_url:
1282             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1283
1284         # Look for UDN embeds
1285         mobj = re.search(
1286             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1287         if mobj is not None:
1288             return self.url_result(
1289                 url_infer_protocol(url, mobj.group('url')), 'UDNEmbed')
1290
1291         def check_video(vurl):
1292             if YoutubeIE.suitable(vurl):
1293                 return True
1294             vpath = compat_urlparse.urlparse(vurl).path
1295             vext = determine_ext(vpath)
1296             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1297
1298         def filter_video(urls):
1299             return list(filter(check_video, urls))
1300
1301         # Start with something easy: JW Player in SWFObject
1302         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1303         if not found:
1304             # Look for gorilla-vid style embedding
1305             found = filter_video(re.findall(r'''(?sx)
1306                 (?:
1307                     jw_plugins|
1308                     JWPlayerOptions|
1309                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1310                 )
1311                 .*?
1312                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1313         if not found:
1314             # Broaden the search a little bit
1315             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1316         if not found:
1317             # Broaden the findall a little bit: JWPlayer JS loader
1318             found = filter_video(re.findall(
1319                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1320         if not found:
1321             # Flow player
1322             found = filter_video(re.findall(r'''(?xs)
1323                 flowplayer\("[^"]+",\s*
1324                     \{[^}]+?\}\s*,
1325                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1326                         ["']?url["']?\s*:\s*["']([^"']+)["']
1327             ''', webpage))
1328         if not found:
1329             # Cinerama player
1330             found = re.findall(
1331                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1332         if not found:
1333             # Try to find twitter cards info
1334             found = filter_video(re.findall(
1335                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1336         if not found:
1337             # We look for Open Graph info:
1338             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1339             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1340             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1341             if m_video_type is not None:
1342                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1343         if not found:
1344             # HTML5 video
1345             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1346         if not found:
1347             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1348             found = re.search(
1349                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1350                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1351                 webpage)
1352             if not found:
1353                 # Look also in Refresh HTTP header
1354                 refresh_header = head_response.headers.get('Refresh')
1355                 if refresh_header:
1356                     found = re.search(REDIRECT_REGEX, refresh_header)
1357             if found:
1358                 new_url = found.group(1)
1359                 self.report_following_redirect(new_url)
1360                 return {
1361                     '_type': 'url',
1362                     'url': new_url,
1363                 }
1364         if not found:
1365             raise UnsupportedError(url)
1366
1367         entries = []
1368         for video_url in found:
1369             video_url = compat_urlparse.urljoin(url, video_url)
1370             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1371
1372             # Sometimes, jwplayer extraction will result in a YouTube URL
1373             if YoutubeIE.suitable(video_url):
1374                 entries.append(self.url_result(video_url, 'Youtube'))
1375                 continue
1376
1377             # here's a fun little line of code for you:
1378             video_id = os.path.splitext(video_id)[0]
1379
1380             entries.append({
1381                 'id': video_id,
1382                 'url': video_url,
1383                 'uploader': video_uploader,
1384                 'title': video_title,
1385                 'age_limit': age_limit,
1386             })
1387
1388         if len(entries) == 1:
1389             return entries[0]
1390         else:
1391             for num, e in enumerate(entries, start=1):
1392                 # 'url' results don't have a title
1393                 if e.get('title') is not None:
1394                     e['title'] = '%s (%d)' % (e['title'], num)
1395             return {
1396                 '_type': 'playlist',
1397                 'entries': entries,
1398             }