[Lecture2Go] Add new extractor
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7
8 from .common import InfoExtractor
9 from .youtube import YoutubeIE
10 from ..compat import (
11     compat_urllib_parse,
12     compat_urlparse,
13     compat_xml_parse_error,
14 )
15 from ..utils import (
16     determine_ext,
17     ExtractorError,
18     float_or_none,
19     HEADRequest,
20     is_html,
21     orderedSet,
22     parse_xml,
23     smuggle_url,
24     unescapeHTML,
25     unified_strdate,
26     unsmuggle_url,
27     UnsupportedError,
28     url_basename,
29     xpath_text,
30 )
31 from .brightcove import BrightcoveIE
32 from .nbc import NBCSportsVPlayerIE
33 from .ooyala import OoyalaIE
34 from .rutv import RUTVIE
35 from .smotri import SmotriIE
36 from .condenast import CondeNastIE
37 from .udn import UDNEmbedIE
38
39
40 class GenericIE(InfoExtractor):
41     IE_DESC = 'Generic downloader that works on some sites'
42     _VALID_URL = r'.*'
43     IE_NAME = 'generic'
44     _TESTS = [
45         {
46             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
47             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
48             'info_dict': {
49                 'id': '13601338388002',
50                 'ext': 'mp4',
51                 'uploader': 'www.hodiho.fr',
52                 'title': 'R\u00e9gis plante sa Jeep',
53             }
54         },
55         # bandcamp page with custom domain
56         {
57             'add_ie': ['Bandcamp'],
58             'url': 'http://bronyrock.com/track/the-pony-mash',
59             'info_dict': {
60                 'id': '3235767654',
61                 'ext': 'mp3',
62                 'title': 'The Pony Mash',
63                 'uploader': 'M_Pallante',
64             },
65             'skip': 'There is a limit of 200 free downloads / month for the test song',
66         },
67         # embedded brightcove video
68         # it also tests brightcove videos that need to set the 'Referer' in the
69         # http requests
70         {
71             'add_ie': ['Brightcove'],
72             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
73             'info_dict': {
74                 'id': '2765128793001',
75                 'ext': 'mp4',
76                 'title': 'Le cours de bourse : l’analyse technique',
77                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
78                 'uploader': 'BFM BUSINESS',
79             },
80             'params': {
81                 'skip_download': True,
82             },
83         },
84         {
85             # https://github.com/rg3/youtube-dl/issues/2253
86             'url': 'http://bcove.me/i6nfkrc3',
87             'md5': '0ba9446db037002366bab3b3eb30c88c',
88             'info_dict': {
89                 'id': '3101154703001',
90                 'ext': 'mp4',
91                 'title': 'Still no power',
92                 'uploader': 'thestar.com',
93                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
94             },
95             'add_ie': ['Brightcove'],
96         },
97         {
98             'url': 'http://www.championat.com/video/football/v/87/87499.html',
99             'md5': 'fb973ecf6e4a78a67453647444222983',
100             'info_dict': {
101                 'id': '3414141473001',
102                 'ext': 'mp4',
103                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
104                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
105                 'uploader': 'Championat',
106             },
107         },
108         {
109             # https://github.com/rg3/youtube-dl/issues/3541
110             'add_ie': ['Brightcove'],
111             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
112             'info_dict': {
113                 'id': '3866516442001',
114                 'ext': 'mp4',
115                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
116                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
117                 'uploader': 'SBS Broadcasting',
118             },
119             'skip': 'Restricted to Netherlands',
120             'params': {
121                 'skip_download': True,  # m3u8 download
122             },
123         },
124         # Direct link to a video
125         {
126             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
127             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
128             'info_dict': {
129                 'id': 'trailer',
130                 'ext': 'mp4',
131                 'title': 'trailer',
132                 'upload_date': '20100513',
133             }
134         },
135         # ooyala video
136         {
137             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
138             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
139             'info_dict': {
140                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
141                 'ext': 'mp4',
142                 'title': '2cc213299525360.mov',  # that's what we get
143             },
144             'add_ie': ['Ooyala'],
145         },
146         # multiple ooyala embeds on SBN network websites
147         {
148             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
149             'info_dict': {
150                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
151                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
152             },
153             'playlist_mincount': 3,
154             'params': {
155                 'skip_download': True,
156             },
157             'add_ie': ['Ooyala'],
158         },
159         # google redirect
160         {
161             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
162             'info_dict': {
163                 'id': 'cmQHVoWB5FY',
164                 'ext': 'mp4',
165                 'upload_date': '20130224',
166                 'uploader_id': 'TheVerge',
167                 'description': 're:^Chris Ziegler takes a look at the\.*',
168                 'uploader': 'The Verge',
169                 'title': 'First Firefox OS phones side-by-side',
170             },
171             'params': {
172                 'skip_download': False,
173             }
174         },
175         # embed.ly video
176         {
177             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
178             'info_dict': {
179                 'id': '9ODmcdjQcHQ',
180                 'ext': 'mp4',
181                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
182                 'upload_date': '20140225',
183                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
184                 'uploader': 'Tested',
185                 'uploader_id': 'testedcom',
186             },
187             # No need to test YoutubeIE here
188             'params': {
189                 'skip_download': True,
190             },
191         },
192         # funnyordie embed
193         {
194             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
195             'info_dict': {
196                 'id': '18e820ec3f',
197                 'ext': 'mp4',
198                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
199                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
200             },
201         },
202         # BBC iPlayer embeds
203         {
204             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
205             'info_dict': {
206                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
207             },
208             'playlist_mincount': 18,
209         },
210         # RUTV embed
211         {
212             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
213             'info_dict': {
214                 'id': '776940',
215                 'ext': 'mp4',
216                 'title': 'Охотское море стало целиком российским',
217                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
218             },
219             'params': {
220                 # m3u8 download
221                 'skip_download': True,
222             },
223         },
224         # Embedded TED video
225         {
226             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
227             'md5': '65fdff94098e4a607385a60c5177c638',
228             'info_dict': {
229                 'id': '1969',
230                 'ext': 'mp4',
231                 'title': 'Hidden miracles of the natural world',
232                 'uploader': 'Louie Schwartzberg',
233                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
234             }
235         },
236         # Embeded Ustream video
237         {
238             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
239             'md5': '27b99cdb639c9b12a79bca876a073417',
240             'info_dict': {
241                 'id': '45734260',
242                 'ext': 'flv',
243                 'uploader': 'AU SPA:  The NSA and Privacy',
244                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
245             }
246         },
247         # nowvideo embed hidden behind percent encoding
248         {
249             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
250             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
251             'info_dict': {
252                 'id': '06e53103ca9aa',
253                 'ext': 'flv',
254                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
255                 'description': 'No description',
256             },
257         },
258         # arte embed
259         {
260             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
261             'md5': '7653032cbb25bf6c80d80f217055fa43',
262             'info_dict': {
263                 'id': '048195-004_PLUS7-F',
264                 'ext': 'flv',
265                 'title': 'X:enius',
266                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
267                 'upload_date': '20140320',
268             },
269             'params': {
270                 'skip_download': 'Requires rtmpdump'
271             }
272         },
273         # Condé Nast embed
274         {
275             'url': 'http://www.wired.com/2014/04/honda-asimo/',
276             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
277             'info_dict': {
278                 'id': '53501be369702d3275860000',
279                 'ext': 'mp4',
280                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
281             }
282         },
283         # Dailymotion embed
284         {
285             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
286             'md5': '441aeeb82eb72c422c7f14ec533999cd',
287             'info_dict': {
288                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
289                 'ext': 'mp4',
290                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
291                 'uploader': 'Spi0n',
292             },
293             'add_ie': ['Dailymotion'],
294         },
295         # YouTube embed
296         {
297             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
298             'info_dict': {
299                 'id': 'FXRb4ykk4S0',
300                 'ext': 'mp4',
301                 'title': 'The NBL Auction 2014',
302                 'uploader': 'BADMINTON England',
303                 'uploader_id': 'BADMINTONEvents',
304                 'upload_date': '20140603',
305                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
306             },
307             'add_ie': ['Youtube'],
308             'params': {
309                 'skip_download': True,
310             }
311         },
312         # MTVSercices embed
313         {
314             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
315             'md5': '35727f82f58c76d996fc188f9755b0d5',
316             'info_dict': {
317                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
318                 'ext': 'mp4',
319                 'title': 'Review',
320                 'description': 'Mario\'s life in the fast lane has never looked so good.',
321             },
322         },
323         # YouTube embed via <data-embed-url="">
324         {
325             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
326             'info_dict': {
327                 'id': '4vAffPZIT44',
328                 'ext': 'mp4',
329                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
330                 'uploader': 'Gameloft',
331                 'uploader_id': 'gameloft',
332                 'upload_date': '20140828',
333                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
334             },
335             'params': {
336                 'skip_download': True,
337             }
338         },
339         # Camtasia studio
340         {
341             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
342             'playlist': [{
343                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
344                 'info_dict': {
345                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
346                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
347                     'ext': 'flv',
348                     'duration': 2235.90,
349                 }
350             }, {
351                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
352                 'info_dict': {
353                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
354                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
355                     'ext': 'flv',
356                     'duration': 2235.93,
357                 }
358             }],
359             'info_dict': {
360                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
361             }
362         },
363         # Flowplayer
364         {
365             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
366             'md5': '9d65602bf31c6e20014319c7d07fba27',
367             'info_dict': {
368                 'id': '5123ea6d5e5a7',
369                 'ext': 'mp4',
370                 'age_limit': 18,
371                 'uploader': 'www.handjobhub.com',
372                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
373             }
374         },
375         # RSS feed
376         {
377             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
378             'info_dict': {
379                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
380                 'title': 'Zero Punctuation',
381                 'description': 're:.*groundbreaking video review series.*'
382             },
383             'playlist_mincount': 11,
384         },
385         # Multiple brightcove videos
386         # https://github.com/rg3/youtube-dl/issues/2283
387         {
388             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
389             'info_dict': {
390                 'id': 'always-never',
391                 'title': 'Always / Never - The New Yorker',
392             },
393             'playlist_count': 3,
394             'params': {
395                 'extract_flat': False,
396                 'skip_download': True,
397             }
398         },
399         # MLB embed
400         {
401             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
402             'md5': '96f09a37e44da40dd083e12d9a683327',
403             'info_dict': {
404                 'id': '33322633',
405                 'ext': 'mp4',
406                 'title': 'Ump changes call to ball',
407                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
408                 'duration': 48,
409                 'timestamp': 1401537900,
410                 'upload_date': '20140531',
411                 'thumbnail': 're:^https?://.*\.jpg$',
412             },
413         },
414         # Wistia embed
415         {
416             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
417             'md5': '8788b683c777a5cf25621eaf286d0c23',
418             'info_dict': {
419                 'id': '1cfaf6b7ea',
420                 'ext': 'mov',
421                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
422                 'duration': 643.0,
423                 'filesize': 182808282,
424                 'uploader': 'education-portal.com',
425             },
426         },
427         {
428             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
429             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
430             'info_dict': {
431                 'id': 'uxjb0lwrcz',
432                 'ext': 'mp4',
433                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
434                 'duration': 1715.0,
435                 'uploader': 'thoughtworks.wistia.com',
436             },
437         },
438         # Direct download with broken HEAD
439         {
440             'url': 'http://ai-radio.org:8000/radio.opus',
441             'info_dict': {
442                 'id': 'radio',
443                 'ext': 'opus',
444                 'title': 'radio',
445             },
446             'params': {
447                 'skip_download': True,  # infinite live stream
448             },
449             'expected_warnings': [
450                 r'501.*Not Implemented'
451             ],
452         },
453         # Soundcloud embed
454         {
455             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
456             'info_dict': {
457                 'id': '174391317',
458                 'ext': 'mp3',
459                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
460                 'uploader': 'Sophos Security',
461                 'title': 'Chet Chat 171 - Oct 29, 2014',
462                 'upload_date': '20141029',
463             }
464         },
465         # Livestream embed
466         {
467             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
468             'info_dict': {
469                 'id': '67864563',
470                 'ext': 'flv',
471                 'upload_date': '20141112',
472                 'title': 'Rosetta #CometLanding webcast HL 10',
473             }
474         },
475         # LazyYT
476         {
477             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
478             'info_dict': {
479                 'id': '1986',
480                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
481             },
482             'playlist_mincount': 2,
483         },
484         # Direct link with incorrect MIME type
485         {
486             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
487             'md5': '4ccbebe5f36706d85221f204d7eb5913',
488             'info_dict': {
489                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
490                 'id': '5_Lennart_Poettering_-_Systemd',
491                 'ext': 'webm',
492                 'title': '5_Lennart_Poettering_-_Systemd',
493                 'upload_date': '20141120',
494             },
495             'expected_warnings': [
496                 'URL could be a direct video link, returning it as such.'
497             ]
498         },
499         # Cinchcast embed
500         {
501             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
502             'info_dict': {
503                 'id': '7141703',
504                 'ext': 'mp3',
505                 'upload_date': '20141126',
506                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
507             }
508         },
509         # Cinerama player
510         {
511             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
512             'info_dict': {
513                 'id': '730m_DandD_1901_512k',
514                 'ext': 'mp4',
515                 'uploader': 'www.abc.net.au',
516                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
517             }
518         },
519         # embedded viddler video
520         {
521             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
522             'info_dict': {
523                 'id': '4d03aad9',
524                 'ext': 'mp4',
525                 'uploader': 'deadspin',
526                 'title': 'WALL-TO-GORTAT',
527                 'timestamp': 1422285291,
528                 'upload_date': '20150126',
529             },
530             'add_ie': ['Viddler'],
531         },
532         # Libsyn embed
533         {
534             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
535             'info_dict': {
536                 'id': '3377616',
537                 'ext': 'mp3',
538                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
539                 'description': 'md5:601cb790edd05908957dae8aaa866465',
540                 'upload_date': '20150220',
541             },
542         },
543         # jwplayer YouTube
544         {
545             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
546             'info_dict': {
547                 'id': 'Mrj4DVp2zeA',
548                 'ext': 'mp4',
549                 'upload_date': '20150212',
550                 'uploader': 'The National Archives UK',
551                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
552                 'uploader_id': 'NationalArchives08',
553                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
554             },
555         },
556         # rtl.nl embed
557         {
558             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
559             'playlist_mincount': 5,
560             'info_dict': {
561                 'id': 'aanslagen-kopenhagen',
562                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
563             }
564         },
565         # Zapiks embed
566         {
567             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
568             'info_dict': {
569                 'id': '118046',
570                 'ext': 'mp4',
571                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
572             }
573         },
574         # Kaltura embed
575         {
576             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
577             'info_dict': {
578                 'id': '1_eergr3h1',
579                 'ext': 'mp4',
580                 'upload_date': '20150226',
581                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
582                 'timestamp': int,
583                 'title': 'John Carlson Postgame 2/25/15',
584             },
585         },
586         # Eagle.Platform embed (generic URL)
587         {
588             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
589             'info_dict': {
590                 'id': '227304',
591                 'ext': 'mp4',
592                 'title': 'Навальный вышел на свободу',
593                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
594                 'thumbnail': 're:^https?://.*\.jpg$',
595                 'duration': 87,
596                 'view_count': int,
597                 'age_limit': 0,
598             },
599         },
600         # ClipYou (Eagle.Platform) embed (custom URL)
601         {
602             'url': 'http://muz-tv.ru/play/7129/',
603             'info_dict': {
604                 'id': '12820',
605                 'ext': 'mp4',
606                 'title': "'O Sole Mio",
607                 'thumbnail': 're:^https?://.*\.jpg$',
608                 'duration': 216,
609                 'view_count': int,
610             },
611         },
612         # Pladform embed
613         {
614             'url': 'http://muz-tv.ru/kinozal/view/7400/',
615             'info_dict': {
616                 'id': '100183293',
617                 'ext': 'mp4',
618                 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
619                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
620                 'thumbnail': 're:^https?://.*\.jpg$',
621                 'duration': 694,
622                 'age_limit': 0,
623             },
624         },
625         # 5min embed
626         {
627             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
628             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
629             'info_dict': {
630                 'id': '518726732',
631                 'ext': 'mp4',
632                 'title': 'Facebook Creates "On This Day" | Crunch Report',
633             },
634         },
635         # RSS feed with enclosure
636         {
637             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
638             'info_dict': {
639                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
640                 'ext': 'm4v',
641                 'upload_date': '20150228',
642                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
643             }
644         },
645         # Crooks and Liars embed
646         {
647             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
648             'info_dict': {
649                 'id': '8RUoRhRi',
650                 'ext': 'mp4',
651                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
652                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
653                 'timestamp': 1428207000,
654                 'upload_date': '20150405',
655                 'uploader': 'Heather',
656             },
657         },
658         # Crooks and Liars external embed
659         {
660             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
661             'info_dict': {
662                 'id': 'MTE3MjUtMzQ2MzA',
663                 'ext': 'mp4',
664                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
665                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
666                 'timestamp': 1265032391,
667                 'upload_date': '20100201',
668                 'uploader': 'Heather',
669             },
670         },
671         # NBC Sports vplayer embed
672         {
673             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
674             'info_dict': {
675                 'id': 'ln7x1qSThw4k',
676                 'ext': 'flv',
677                 'title': "PFT Live: New leader in the 'new-look' defense",
678                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
679             },
680         },
681         # UDN embed
682         {
683             'url': 'http://www.udn.com/news/story/7314/822787',
684             'md5': 'fd2060e988c326991037b9aff9df21a6',
685             'info_dict': {
686                 'id': '300346',
687                 'ext': 'mp4',
688                 'title': '中一中男師變性 全校師生力挺',
689                 'thumbnail': 're:^https?://.*\.jpg$',
690             }
691         },
692         # Ooyala embed
693         {
694             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
695             'info_dict': {
696                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
697                 'ext': 'mp4',
698                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
699                 'title': 'This is what separates the Excel masters from the wannabes',
700             },
701             'params': {
702                 # m3u8 downloads
703                 'skip_download': True,
704             }
705         }
706     ]
707
708     def report_following_redirect(self, new_url):
709         """Report information extraction."""
710         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
711
712     def _extract_rss(self, url, video_id, doc):
713         playlist_title = doc.find('./channel/title').text
714         playlist_desc_el = doc.find('./channel/description')
715         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
716
717         entries = []
718         for it in doc.findall('./channel/item'):
719             next_url = xpath_text(it, 'link', fatal=False)
720             if not next_url:
721                 enclosure_nodes = it.findall('./enclosure')
722                 for e in enclosure_nodes:
723                     next_url = e.attrib.get('url')
724                     if next_url:
725                         break
726
727             if not next_url:
728                 continue
729
730             entries.append({
731                 '_type': 'url',
732                 'url': next_url,
733                 'title': it.find('title').text,
734             })
735
736         return {
737             '_type': 'playlist',
738             'id': url,
739             'title': playlist_title,
740             'description': playlist_desc,
741             'entries': entries,
742         }
743
744     def _extract_camtasia(self, url, video_id, webpage):
745         """ Returns None if no camtasia video can be found. """
746
747         camtasia_cfg = self._search_regex(
748             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
749             webpage, 'camtasia configuration file', default=None)
750         if camtasia_cfg is None:
751             return None
752
753         title = self._html_search_meta('DC.title', webpage, fatal=True)
754
755         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
756         camtasia_cfg = self._download_xml(
757             camtasia_url, video_id,
758             note='Downloading camtasia configuration',
759             errnote='Failed to download camtasia configuration')
760         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
761
762         entries = []
763         for n in fileset_node.getchildren():
764             url_n = n.find('./uri')
765             if url_n is None:
766                 continue
767
768             entries.append({
769                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
770                 'title': '%s - %s' % (title, n.tag),
771                 'url': compat_urlparse.urljoin(url, url_n.text),
772                 'duration': float_or_none(n.find('./duration').text),
773             })
774
775         return {
776             '_type': 'playlist',
777             'entries': entries,
778             'title': title,
779         }
780
781     def _real_extract(self, url):
782         if url.startswith('//'):
783             return {
784                 '_type': 'url',
785                 'url': self.http_scheme() + url,
786             }
787
788         parsed_url = compat_urlparse.urlparse(url)
789         if not parsed_url.scheme:
790             default_search = self._downloader.params.get('default_search')
791             if default_search is None:
792                 default_search = 'fixup_error'
793
794             if default_search in ('auto', 'auto_warning', 'fixup_error'):
795                 if '/' in url:
796                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
797                     return self.url_result('http://' + url)
798                 elif default_search != 'fixup_error':
799                     if default_search == 'auto_warning':
800                         if re.match(r'^(?:url|URL)$', url):
801                             raise ExtractorError(
802                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
803                                 expected=True)
804                         else:
805                             self._downloader.report_warning(
806                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
807                     return self.url_result('ytsearch:' + url)
808
809             if default_search in ('error', 'fixup_error'):
810                 raise ExtractorError(
811                     '%r is not a valid URL. '
812                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
813                     % (url, url), expected=True)
814             else:
815                 if ':' not in default_search:
816                     default_search += ':'
817                 return self.url_result(default_search + url)
818
819         url, smuggled_data = unsmuggle_url(url)
820         force_videoid = None
821         is_intentional = smuggled_data and smuggled_data.get('to_generic')
822         if smuggled_data and 'force_videoid' in smuggled_data:
823             force_videoid = smuggled_data['force_videoid']
824             video_id = force_videoid
825         else:
826             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
827
828         self.to_screen('%s: Requesting header' % video_id)
829
830         head_req = HEADRequest(url)
831         head_response = self._request_webpage(
832             head_req, video_id,
833             note=False, errnote='Could not send HEAD request to %s' % url,
834             fatal=False)
835
836         if head_response is not False:
837             # Check for redirect
838             new_url = head_response.geturl()
839             if url != new_url:
840                 self.report_following_redirect(new_url)
841                 if force_videoid:
842                     new_url = smuggle_url(
843                         new_url, {'force_videoid': force_videoid})
844                 return self.url_result(new_url)
845
846         full_response = None
847         if head_response is False:
848             full_response = self._request_webpage(url, video_id)
849             head_response = full_response
850
851         # Check for direct link to a video
852         content_type = head_response.headers.get('Content-Type', '')
853         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
854         if m:
855             upload_date = unified_strdate(
856                 head_response.headers.get('Last-Modified'))
857             return {
858                 'id': video_id,
859                 'title': os.path.splitext(url_basename(url))[0],
860                 'direct': True,
861                 'formats': [{
862                     'format_id': m.group('format_id'),
863                     'url': url,
864                     'vcodec': 'none' if m.group('type') == 'audio' else None
865                 }],
866                 'upload_date': upload_date,
867             }
868
869         if not self._downloader.params.get('test', False) and not is_intentional:
870             self._downloader.report_warning('Falling back on generic information extractor.')
871
872         if not full_response:
873             full_response = self._request_webpage(url, video_id)
874
875         # Maybe it's a direct link to a video?
876         # Be careful not to download the whole thing!
877         first_bytes = full_response.read(512)
878         if not is_html(first_bytes):
879             self._downloader.report_warning(
880                 'URL could be a direct video link, returning it as such.')
881             upload_date = unified_strdate(
882                 head_response.headers.get('Last-Modified'))
883             return {
884                 'id': video_id,
885                 'title': os.path.splitext(url_basename(url))[0],
886                 'direct': True,
887                 'url': url,
888                 'upload_date': upload_date,
889             }
890
891         webpage = self._webpage_read_content(
892             full_response, url, video_id, prefix=first_bytes)
893
894         self.report_extraction(video_id)
895
896         # Is it an RSS feed?
897         try:
898             doc = parse_xml(webpage)
899             if doc.tag == 'rss':
900                 return self._extract_rss(url, video_id, doc)
901         except compat_xml_parse_error:
902             pass
903
904         # Is it a Camtasia project?
905         camtasia_res = self._extract_camtasia(url, video_id, webpage)
906         if camtasia_res is not None:
907             return camtasia_res
908
909         # Sometimes embedded video player is hidden behind percent encoding
910         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
911         # Unescaping the whole page allows to handle those cases in a generic way
912         webpage = compat_urllib_parse.unquote(webpage)
913
914         # it's tempting to parse this further, but you would
915         # have to take into account all the variations like
916         #   Video Title - Site Name
917         #   Site Name | Video Title
918         #   Video Title - Tagline | Site Name
919         # and so on and so forth; it's just not practical
920         video_title = self._html_search_regex(
921             r'(?s)<title>(.*?)</title>', webpage, 'video title',
922             default='video')
923
924         # Try to detect age limit automatically
925         age_limit = self._rta_search(webpage)
926         # And then there are the jokers who advertise that they use RTA,
927         # but actually don't.
928         AGE_LIMIT_MARKERS = [
929             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
930         ]
931         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
932             age_limit = 18
933
934         # video uploader is domain name
935         video_uploader = self._search_regex(
936             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
937
938         # Helper method
939         def _playlist_from_matches(matches, getter=None, ie=None):
940             urlrs = orderedSet(
941                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
942                 for m in matches)
943             return self.playlist_result(
944                 urlrs, playlist_id=video_id, playlist_title=video_title)
945
946         # Look for BrightCove:
947         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
948         if bc_urls:
949             self.to_screen('Brightcove video detected.')
950             entries = [{
951                 '_type': 'url',
952                 'url': smuggle_url(bc_url, {'Referer': url}),
953                 'ie_key': 'Brightcove'
954             } for bc_url in bc_urls]
955
956             return {
957                 '_type': 'playlist',
958                 'title': video_title,
959                 'id': video_id,
960                 'entries': entries,
961             }
962
963         # Look for embedded rtl.nl player
964         matches = re.findall(
965             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
966             webpage)
967         if matches:
968             return _playlist_from_matches(matches, ie='RtlNl')
969
970         # Look for embedded (iframe) Vimeo player
971         mobj = re.search(
972             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
973         if mobj:
974             player_url = unescapeHTML(mobj.group('url'))
975             surl = smuggle_url(player_url, {'Referer': url})
976             return self.url_result(surl)
977         # Look for embedded (swf embed) Vimeo player
978         mobj = re.search(
979             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
980         if mobj:
981             return self.url_result(mobj.group(1))
982
983         # Look for embedded YouTube player
984         matches = re.findall(r'''(?x)
985             (?:
986                 <iframe[^>]+?src=|
987                 data-video-url=|
988                 <embed[^>]+?src=|
989                 embedSWF\(?:\s*|
990                 new\s+SWFObject\(
991             )
992             (["\'])
993                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
994                 (?:embed|v|p)/.+?)
995             \1''', webpage)
996         if matches:
997             return _playlist_from_matches(
998                 matches, lambda m: unescapeHTML(m[1]))
999
1000         # Look for lazyYT YouTube embed
1001         matches = re.findall(
1002             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1003         if matches:
1004             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1005
1006         # Look for embedded Dailymotion player
1007         matches = re.findall(
1008             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1009         if matches:
1010             return _playlist_from_matches(
1011                 matches, lambda m: unescapeHTML(m[1]))
1012
1013         # Look for embedded Dailymotion playlist player (#3822)
1014         m = re.search(
1015             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1016         if m:
1017             playlists = re.findall(
1018                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1019             if playlists:
1020                 return _playlist_from_matches(
1021                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1022
1023         # Look for embedded Wistia player
1024         match = re.search(
1025             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1026         if match:
1027             embed_url = self._proto_relative_url(
1028                 unescapeHTML(match.group('url')))
1029             return {
1030                 '_type': 'url_transparent',
1031                 'url': embed_url,
1032                 'ie_key': 'Wistia',
1033                 'uploader': video_uploader,
1034                 'title': video_title,
1035                 'id': video_id,
1036             }
1037
1038         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1039         if match:
1040             return {
1041                 '_type': 'url_transparent',
1042                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1043                 'ie_key': 'Wistia',
1044                 'uploader': video_uploader,
1045                 'title': video_title,
1046                 'id': match.group('id')
1047             }
1048
1049         # Look for embedded blip.tv player
1050         mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
1051         if mobj:
1052             return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
1053         mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
1054         if mobj:
1055             return self.url_result(mobj.group(1), 'BlipTV')
1056
1057         # Look for embedded condenast player
1058         matches = re.findall(
1059             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1060             webpage)
1061         if matches:
1062             return {
1063                 '_type': 'playlist',
1064                 'entries': [{
1065                     '_type': 'url',
1066                     'ie_key': 'CondeNast',
1067                     'url': ma,
1068                 } for ma in matches],
1069                 'title': video_title,
1070                 'id': video_id,
1071             }
1072
1073         # Look for Bandcamp pages with custom domain
1074         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1075         if mobj is not None:
1076             burl = unescapeHTML(mobj.group(1))
1077             # Don't set the extractor because it can be a track url or an album
1078             return self.url_result(burl)
1079
1080         # Look for embedded Vevo player
1081         mobj = re.search(
1082             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1083         if mobj is not None:
1084             return self.url_result(mobj.group('url'))
1085
1086         # Look for embedded Viddler player
1087         mobj = re.search(
1088             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1089             webpage)
1090         if mobj is not None:
1091             return self.url_result(mobj.group('url'))
1092
1093         # Look for NYTimes player
1094         mobj = re.search(
1095             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1096             webpage)
1097         if mobj is not None:
1098             return self.url_result(mobj.group('url'))
1099
1100         # Look for Libsyn player
1101         mobj = re.search(
1102             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1103         if mobj is not None:
1104             return self.url_result(mobj.group('url'))
1105
1106         # Look for Ooyala videos
1107         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1108                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1109                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1110                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1111         if mobj is not None:
1112             return OoyalaIE._build_url_result(mobj.group('ec'))
1113
1114         # Look for multiple Ooyala embeds on SBN network websites
1115         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1116         if mobj is not None:
1117             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1118             if embeds:
1119                 return _playlist_from_matches(
1120                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1121
1122         # Look for Aparat videos
1123         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1124         if mobj is not None:
1125             return self.url_result(mobj.group(1), 'Aparat')
1126
1127         # Look for MPORA videos
1128         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1129         if mobj is not None:
1130             return self.url_result(mobj.group(1), 'Mpora')
1131
1132         # Look for embedded NovaMov-based player
1133         mobj = re.search(
1134             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1135                     (?P<url>http://(?:(?:embed|www)\.)?
1136                         (?:novamov\.com|
1137                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1138                            videoweed\.(?:es|com)|
1139                            movshare\.(?:net|sx|ag)|
1140                            divxstage\.(?:eu|net|ch|co|at|ag))
1141                         /embed\.php.+?)\1''', webpage)
1142         if mobj is not None:
1143             return self.url_result(mobj.group('url'))
1144
1145         # Look for embedded Facebook player
1146         mobj = re.search(
1147             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1148         if mobj is not None:
1149             return self.url_result(mobj.group('url'), 'Facebook')
1150
1151         # Look for embedded VK player
1152         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1153         if mobj is not None:
1154             return self.url_result(mobj.group('url'), 'VK')
1155
1156         # Look for embedded ivi player
1157         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1158         if mobj is not None:
1159             return self.url_result(mobj.group('url'), 'Ivi')
1160
1161         # Look for embedded Huffington Post player
1162         mobj = re.search(
1163             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1164         if mobj is not None:
1165             return self.url_result(mobj.group('url'), 'HuffPost')
1166
1167         # Look for embed.ly
1168         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1169         if mobj is not None:
1170             return self.url_result(mobj.group('url'))
1171         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1172         if mobj is not None:
1173             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1174
1175         # Look for funnyordie embed
1176         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1177         if matches:
1178             return _playlist_from_matches(
1179                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1180
1181         # Look for BBC iPlayer embed
1182         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1183         if matches:
1184             return _playlist_from_matches(matches, ie='BBCCoUk')
1185
1186         # Look for embedded RUTV player
1187         rutv_url = RUTVIE._extract_url(webpage)
1188         if rutv_url:
1189             return self.url_result(rutv_url, 'RUTV')
1190
1191         # Look for embedded TED player
1192         mobj = re.search(
1193             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1194         if mobj is not None:
1195             return self.url_result(mobj.group('url'), 'TED')
1196
1197         # Look for embedded Ustream videos
1198         mobj = re.search(
1199             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1200         if mobj is not None:
1201             return self.url_result(mobj.group('url'), 'Ustream')
1202
1203         # Look for embedded arte.tv player
1204         mobj = re.search(
1205             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1206             webpage)
1207         if mobj is not None:
1208             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1209
1210         # Look for embedded smotri.com player
1211         smotri_url = SmotriIE._extract_url(webpage)
1212         if smotri_url:
1213             return self.url_result(smotri_url, 'Smotri')
1214
1215         # Look for embeded soundcloud player
1216         mobj = re.search(
1217             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1218             webpage)
1219         if mobj is not None:
1220             url = unescapeHTML(mobj.group('url'))
1221             return self.url_result(url)
1222
1223         # Look for embedded vulture.com player
1224         mobj = re.search(
1225             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1226             webpage)
1227         if mobj is not None:
1228             url = unescapeHTML(mobj.group('url'))
1229             return self.url_result(url, ie='Vulture')
1230
1231         # Look for embedded mtvservices player
1232         mobj = re.search(
1233             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1234             webpage)
1235         if mobj is not None:
1236             url = unescapeHTML(mobj.group('url'))
1237             return self.url_result(url, ie='MTVServicesEmbedded')
1238
1239         # Look for embedded yahoo player
1240         mobj = re.search(
1241             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1242             webpage)
1243         if mobj is not None:
1244             return self.url_result(mobj.group('url'), 'Yahoo')
1245
1246         # Look for embedded sbs.com.au player
1247         mobj = re.search(
1248             r'''(?x)
1249             (?:
1250                 <meta\s+property="og:video"\s+content=|
1251                 <iframe[^>]+?src=
1252             )
1253             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1254             webpage)
1255         if mobj is not None:
1256             return self.url_result(mobj.group('url'), 'SBS')
1257
1258         # Look for embedded Cinchcast player
1259         mobj = re.search(
1260             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1261             webpage)
1262         if mobj is not None:
1263             return self.url_result(mobj.group('url'), 'Cinchcast')
1264
1265         mobj = re.search(
1266             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1267             webpage)
1268         if mobj is not None:
1269             return self.url_result(mobj.group('url'), 'MLB')
1270
1271         mobj = re.search(
1272             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1273             webpage)
1274         if mobj is not None:
1275             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1276
1277         mobj = re.search(
1278             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1279             webpage)
1280         if mobj is not None:
1281             return self.url_result(mobj.group('url'), 'Livestream')
1282
1283         # Look for Zapiks embed
1284         mobj = re.search(
1285             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1286         if mobj is not None:
1287             return self.url_result(mobj.group('url'), 'Zapiks')
1288
1289         # Look for Kaltura embeds
1290         mobj = re.search(
1291             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1292         if mobj is not None:
1293             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1294
1295         # Look for Eagle.Platform embeds
1296         mobj = re.search(
1297             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1298         if mobj is not None:
1299             return self.url_result(mobj.group('url'), 'EaglePlatform')
1300
1301         # Look for ClipYou (uses Eagle.Platform) embeds
1302         mobj = re.search(
1303             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1304         if mobj is not None:
1305             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1306
1307         # Look for Pladform embeds
1308         mobj = re.search(
1309             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1310         if mobj is not None:
1311             return self.url_result(mobj.group('url'), 'Pladform')
1312
1313         # Look for 5min embeds
1314         mobj = re.search(
1315             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1316         if mobj is not None:
1317             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1318
1319         # Look for Crooks and Liars embeds
1320         mobj = re.search(
1321             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1322         if mobj is not None:
1323             return self.url_result(mobj.group('url'))
1324
1325         # Look for NBC Sports VPlayer embeds
1326         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1327         if nbc_sports_url:
1328             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1329
1330         # Look for UDN embeds
1331         mobj = re.search(
1332             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1333         if mobj is not None:
1334             return self.url_result(
1335                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1336
1337         def check_video(vurl):
1338             if YoutubeIE.suitable(vurl):
1339                 return True
1340             vpath = compat_urlparse.urlparse(vurl).path
1341             vext = determine_ext(vpath)
1342             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1343
1344         def filter_video(urls):
1345             return list(filter(check_video, urls))
1346
1347         # Start with something easy: JW Player in SWFObject
1348         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1349         if not found:
1350             # Look for gorilla-vid style embedding
1351             found = filter_video(re.findall(r'''(?sx)
1352                 (?:
1353                     jw_plugins|
1354                     JWPlayerOptions|
1355                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1356                 )
1357                 .*?
1358                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1359         if not found:
1360             # Broaden the search a little bit
1361             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1362         if not found:
1363             # Broaden the findall a little bit: JWPlayer JS loader
1364             found = filter_video(re.findall(
1365                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1366         if not found:
1367             # Flow player
1368             found = filter_video(re.findall(r'''(?xs)
1369                 flowplayer\("[^"]+",\s*
1370                     \{[^}]+?\}\s*,
1371                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1372                         ["']?url["']?\s*:\s*["']([^"']+)["']
1373             ''', webpage))
1374         if not found:
1375             # Cinerama player
1376             found = re.findall(
1377                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1378         if not found:
1379             # Try to find twitter cards info
1380             found = filter_video(re.findall(
1381                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1382         if not found:
1383             # We look for Open Graph info:
1384             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1385             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1386             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1387             if m_video_type is not None:
1388                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1389         if not found:
1390             # HTML5 video
1391             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1392         if not found:
1393             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1394             found = re.search(
1395                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1396                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1397                 webpage)
1398             if not found:
1399                 # Look also in Refresh HTTP header
1400                 refresh_header = head_response.headers.get('Refresh')
1401                 if refresh_header:
1402                     found = re.search(REDIRECT_REGEX, refresh_header)
1403             if found:
1404                 new_url = found.group(1)
1405                 self.report_following_redirect(new_url)
1406                 return {
1407                     '_type': 'url',
1408                     'url': new_url,
1409                 }
1410         if not found:
1411             raise UnsupportedError(url)
1412
1413         entries = []
1414         for video_url in found:
1415             video_url = compat_urlparse.urljoin(url, video_url)
1416             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1417
1418             # Sometimes, jwplayer extraction will result in a YouTube URL
1419             if YoutubeIE.suitable(video_url):
1420                 entries.append(self.url_result(video_url, 'Youtube'))
1421                 continue
1422
1423             # here's a fun little line of code for you:
1424             video_id = os.path.splitext(video_id)[0]
1425
1426             entries.append({
1427                 'id': video_id,
1428                 'url': video_url,
1429                 'uploader': video_uploader,
1430                 'title': video_title,
1431                 'age_limit': age_limit,
1432             })
1433
1434         if len(entries) == 1:
1435             return entries[0]
1436         else:
1437             for num, e in enumerate(entries, start=1):
1438                 # 'url' results don't have a title
1439                 if e.get('title') is not None:
1440                     e['title'] = '%s (%d)' % (e['title'], num)
1441             return {
1442                 '_type': 'playlist',
1443                 'entries': entries,
1444             }