[mlb] Fix extraction of articles
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7
8 from .common import InfoExtractor
9 from .youtube import YoutubeIE
10 from ..compat import (
11     compat_urllib_parse,
12     compat_urlparse,
13     compat_xml_parse_error,
14 )
15 from ..utils import (
16     determine_ext,
17     ExtractorError,
18     float_or_none,
19     HEADRequest,
20     is_html,
21     orderedSet,
22     parse_xml,
23     smuggle_url,
24     unescapeHTML,
25     unified_strdate,
26     unsmuggle_url,
27     UnsupportedError,
28     url_basename,
29     xpath_text,
30 )
31 from .brightcove import BrightcoveIE
32 from .nbc import NBCSportsVPlayerIE
33 from .ooyala import OoyalaIE
34 from .rutv import RUTVIE
35 from .smotri import SmotriIE
36 from .condenast import CondeNastIE
37 from .udn import UDNEmbedIE
38 from .senateisvp import SenateISVPIE
39 from .bliptv import BlipTVIE
40 from .svt import SVTIE
41
42
43 class GenericIE(InfoExtractor):
44     IE_DESC = 'Generic downloader that works on some sites'
45     _VALID_URL = r'.*'
46     IE_NAME = 'generic'
47     _TESTS = [
48         {
49             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
50             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
51             'info_dict': {
52                 'id': '13601338388002',
53                 'ext': 'mp4',
54                 'uploader': 'www.hodiho.fr',
55                 'title': 'R\u00e9gis plante sa Jeep',
56             }
57         },
58         # bandcamp page with custom domain
59         {
60             'add_ie': ['Bandcamp'],
61             'url': 'http://bronyrock.com/track/the-pony-mash',
62             'info_dict': {
63                 'id': '3235767654',
64                 'ext': 'mp3',
65                 'title': 'The Pony Mash',
66                 'uploader': 'M_Pallante',
67             },
68             'skip': 'There is a limit of 200 free downloads / month for the test song',
69         },
70         # embedded brightcove video
71         # it also tests brightcove videos that need to set the 'Referer' in the
72         # http requests
73         {
74             'add_ie': ['Brightcove'],
75             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
76             'info_dict': {
77                 'id': '2765128793001',
78                 'ext': 'mp4',
79                 'title': 'Le cours de bourse : l’analyse technique',
80                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
81                 'uploader': 'BFM BUSINESS',
82             },
83             'params': {
84                 'skip_download': True,
85             },
86         },
87         {
88             # https://github.com/rg3/youtube-dl/issues/2253
89             'url': 'http://bcove.me/i6nfkrc3',
90             'md5': '0ba9446db037002366bab3b3eb30c88c',
91             'info_dict': {
92                 'id': '3101154703001',
93                 'ext': 'mp4',
94                 'title': 'Still no power',
95                 'uploader': 'thestar.com',
96                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
97             },
98             'add_ie': ['Brightcove'],
99         },
100         {
101             'url': 'http://www.championat.com/video/football/v/87/87499.html',
102             'md5': 'fb973ecf6e4a78a67453647444222983',
103             'info_dict': {
104                 'id': '3414141473001',
105                 'ext': 'mp4',
106                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
107                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
108                 'uploader': 'Championat',
109             },
110         },
111         {
112             # https://github.com/rg3/youtube-dl/issues/3541
113             'add_ie': ['Brightcove'],
114             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
115             'info_dict': {
116                 'id': '3866516442001',
117                 'ext': 'mp4',
118                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
119                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
120                 'uploader': 'SBS Broadcasting',
121             },
122             'skip': 'Restricted to Netherlands',
123             'params': {
124                 'skip_download': True,  # m3u8 download
125             },
126         },
127         # Direct link to a video
128         {
129             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
130             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
131             'info_dict': {
132                 'id': 'trailer',
133                 'ext': 'mp4',
134                 'title': 'trailer',
135                 'upload_date': '20100513',
136             }
137         },
138         # ooyala video
139         {
140             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
141             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
142             'info_dict': {
143                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
144                 'ext': 'mp4',
145                 'title': '2cc213299525360.mov',  # that's what we get
146             },
147             'add_ie': ['Ooyala'],
148         },
149         # multiple ooyala embeds on SBN network websites
150         {
151             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
152             'info_dict': {
153                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
154                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
155             },
156             'playlist_mincount': 3,
157             'params': {
158                 'skip_download': True,
159             },
160             'add_ie': ['Ooyala'],
161         },
162         # google redirect
163         {
164             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
165             'info_dict': {
166                 'id': 'cmQHVoWB5FY',
167                 'ext': 'mp4',
168                 'upload_date': '20130224',
169                 'uploader_id': 'TheVerge',
170                 'description': 're:^Chris Ziegler takes a look at the\.*',
171                 'uploader': 'The Verge',
172                 'title': 'First Firefox OS phones side-by-side',
173             },
174             'params': {
175                 'skip_download': False,
176             }
177         },
178         # embed.ly video
179         {
180             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
181             'info_dict': {
182                 'id': '9ODmcdjQcHQ',
183                 'ext': 'mp4',
184                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
185                 'upload_date': '20140225',
186                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
187                 'uploader': 'Tested',
188                 'uploader_id': 'testedcom',
189             },
190             # No need to test YoutubeIE here
191             'params': {
192                 'skip_download': True,
193             },
194         },
195         # funnyordie embed
196         {
197             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
198             'info_dict': {
199                 'id': '18e820ec3f',
200                 'ext': 'mp4',
201                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
202                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
203             },
204         },
205         # BBC iPlayer embeds
206         {
207             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
208             'info_dict': {
209                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
210             },
211             'playlist_mincount': 18,
212         },
213         # RUTV embed
214         {
215             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
216             'info_dict': {
217                 'id': '776940',
218                 'ext': 'mp4',
219                 'title': 'Охотское море стало целиком российским',
220                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
221             },
222             'params': {
223                 # m3u8 download
224                 'skip_download': True,
225             },
226         },
227         # Embedded TED video
228         {
229             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
230             'md5': '65fdff94098e4a607385a60c5177c638',
231             'info_dict': {
232                 'id': '1969',
233                 'ext': 'mp4',
234                 'title': 'Hidden miracles of the natural world',
235                 'uploader': 'Louie Schwartzberg',
236                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
237             }
238         },
239         # Embeded Ustream video
240         {
241             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
242             'md5': '27b99cdb639c9b12a79bca876a073417',
243             'info_dict': {
244                 'id': '45734260',
245                 'ext': 'flv',
246                 'uploader': 'AU SPA:  The NSA and Privacy',
247                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
248             }
249         },
250         # nowvideo embed hidden behind percent encoding
251         {
252             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
253             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
254             'info_dict': {
255                 'id': '06e53103ca9aa',
256                 'ext': 'flv',
257                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
258                 'description': 'No description',
259             },
260         },
261         # arte embed
262         {
263             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
264             'md5': '7653032cbb25bf6c80d80f217055fa43',
265             'info_dict': {
266                 'id': '048195-004_PLUS7-F',
267                 'ext': 'flv',
268                 'title': 'X:enius',
269                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
270                 'upload_date': '20140320',
271             },
272             'params': {
273                 'skip_download': 'Requires rtmpdump'
274             }
275         },
276         # Condé Nast embed
277         {
278             'url': 'http://www.wired.com/2014/04/honda-asimo/',
279             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
280             'info_dict': {
281                 'id': '53501be369702d3275860000',
282                 'ext': 'mp4',
283                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
284             }
285         },
286         # Dailymotion embed
287         {
288             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
289             'md5': '441aeeb82eb72c422c7f14ec533999cd',
290             'info_dict': {
291                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
292                 'ext': 'mp4',
293                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
294                 'uploader': 'Spi0n',
295             },
296             'add_ie': ['Dailymotion'],
297         },
298         # YouTube embed
299         {
300             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
301             'info_dict': {
302                 'id': 'FXRb4ykk4S0',
303                 'ext': 'mp4',
304                 'title': 'The NBL Auction 2014',
305                 'uploader': 'BADMINTON England',
306                 'uploader_id': 'BADMINTONEvents',
307                 'upload_date': '20140603',
308                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
309             },
310             'add_ie': ['Youtube'],
311             'params': {
312                 'skip_download': True,
313             }
314         },
315         # MTVSercices embed
316         {
317             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
318             'md5': '35727f82f58c76d996fc188f9755b0d5',
319             'info_dict': {
320                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
321                 'ext': 'mp4',
322                 'title': 'Review',
323                 'description': 'Mario\'s life in the fast lane has never looked so good.',
324             },
325         },
326         # YouTube embed via <data-embed-url="">
327         {
328             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
329             'info_dict': {
330                 'id': '4vAffPZIT44',
331                 'ext': 'mp4',
332                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
333                 'uploader': 'Gameloft',
334                 'uploader_id': 'gameloft',
335                 'upload_date': '20140828',
336                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
337             },
338             'params': {
339                 'skip_download': True,
340             }
341         },
342         # Camtasia studio
343         {
344             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
345             'playlist': [{
346                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
347                 'info_dict': {
348                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
349                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
350                     'ext': 'flv',
351                     'duration': 2235.90,
352                 }
353             }, {
354                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
355                 'info_dict': {
356                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
357                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
358                     'ext': 'flv',
359                     'duration': 2235.93,
360                 }
361             }],
362             'info_dict': {
363                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
364             }
365         },
366         # Flowplayer
367         {
368             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
369             'md5': '9d65602bf31c6e20014319c7d07fba27',
370             'info_dict': {
371                 'id': '5123ea6d5e5a7',
372                 'ext': 'mp4',
373                 'age_limit': 18,
374                 'uploader': 'www.handjobhub.com',
375                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
376             }
377         },
378         # RSS feed
379         {
380             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
381             'info_dict': {
382                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
383                 'title': 'Zero Punctuation',
384                 'description': 're:.*groundbreaking video review series.*'
385             },
386             'playlist_mincount': 11,
387         },
388         # Multiple brightcove videos
389         # https://github.com/rg3/youtube-dl/issues/2283
390         {
391             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
392             'info_dict': {
393                 'id': 'always-never',
394                 'title': 'Always / Never - The New Yorker',
395             },
396             'playlist_count': 3,
397             'params': {
398                 'extract_flat': False,
399                 'skip_download': True,
400             }
401         },
402         # MLB embed
403         {
404             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
405             'md5': '96f09a37e44da40dd083e12d9a683327',
406             'info_dict': {
407                 'id': '33322633',
408                 'ext': 'mp4',
409                 'title': 'Ump changes call to ball',
410                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
411                 'duration': 48,
412                 'timestamp': 1401537900,
413                 'upload_date': '20140531',
414                 'thumbnail': 're:^https?://.*\.jpg$',
415             },
416         },
417         # Wistia embed
418         {
419             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
420             'md5': '8788b683c777a5cf25621eaf286d0c23',
421             'info_dict': {
422                 'id': '1cfaf6b7ea',
423                 'ext': 'mov',
424                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
425                 'duration': 643.0,
426                 'filesize': 182808282,
427                 'uploader': 'education-portal.com',
428             },
429         },
430         {
431             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
432             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
433             'info_dict': {
434                 'id': 'uxjb0lwrcz',
435                 'ext': 'mp4',
436                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
437                 'duration': 1715.0,
438                 'uploader': 'thoughtworks.wistia.com',
439             },
440         },
441         # Direct download with broken HEAD
442         {
443             'url': 'http://ai-radio.org:8000/radio.opus',
444             'info_dict': {
445                 'id': 'radio',
446                 'ext': 'opus',
447                 'title': 'radio',
448             },
449             'params': {
450                 'skip_download': True,  # infinite live stream
451             },
452             'expected_warnings': [
453                 r'501.*Not Implemented'
454             ],
455         },
456         # Soundcloud embed
457         {
458             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
459             'info_dict': {
460                 'id': '174391317',
461                 'ext': 'mp3',
462                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
463                 'uploader': 'Sophos Security',
464                 'title': 'Chet Chat 171 - Oct 29, 2014',
465                 'upload_date': '20141029',
466             }
467         },
468         # Livestream embed
469         {
470             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
471             'info_dict': {
472                 'id': '67864563',
473                 'ext': 'flv',
474                 'upload_date': '20141112',
475                 'title': 'Rosetta #CometLanding webcast HL 10',
476             }
477         },
478         # LazyYT
479         {
480             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
481             'info_dict': {
482                 'id': '1986',
483                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
484             },
485             'playlist_mincount': 2,
486         },
487         # Direct link with incorrect MIME type
488         {
489             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
490             'md5': '4ccbebe5f36706d85221f204d7eb5913',
491             'info_dict': {
492                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
493                 'id': '5_Lennart_Poettering_-_Systemd',
494                 'ext': 'webm',
495                 'title': '5_Lennart_Poettering_-_Systemd',
496                 'upload_date': '20141120',
497             },
498             'expected_warnings': [
499                 'URL could be a direct video link, returning it as such.'
500             ]
501         },
502         # Cinchcast embed
503         {
504             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
505             'info_dict': {
506                 'id': '7141703',
507                 'ext': 'mp3',
508                 'upload_date': '20141126',
509                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
510             }
511         },
512         # Cinerama player
513         {
514             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
515             'info_dict': {
516                 'id': '730m_DandD_1901_512k',
517                 'ext': 'mp4',
518                 'uploader': 'www.abc.net.au',
519                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
520             }
521         },
522         # embedded viddler video
523         {
524             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
525             'info_dict': {
526                 'id': '4d03aad9',
527                 'ext': 'mp4',
528                 'uploader': 'deadspin',
529                 'title': 'WALL-TO-GORTAT',
530                 'timestamp': 1422285291,
531                 'upload_date': '20150126',
532             },
533             'add_ie': ['Viddler'],
534         },
535         # Libsyn embed
536         {
537             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
538             'info_dict': {
539                 'id': '3377616',
540                 'ext': 'mp3',
541                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
542                 'description': 'md5:601cb790edd05908957dae8aaa866465',
543                 'upload_date': '20150220',
544             },
545         },
546         # jwplayer YouTube
547         {
548             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
549             'info_dict': {
550                 'id': 'Mrj4DVp2zeA',
551                 'ext': 'mp4',
552                 'upload_date': '20150212',
553                 'uploader': 'The National Archives UK',
554                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
555                 'uploader_id': 'NationalArchives08',
556                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
557             },
558         },
559         # rtl.nl embed
560         {
561             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
562             'playlist_mincount': 5,
563             'info_dict': {
564                 'id': 'aanslagen-kopenhagen',
565                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
566             }
567         },
568         # Zapiks embed
569         {
570             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
571             'info_dict': {
572                 'id': '118046',
573                 'ext': 'mp4',
574                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
575             }
576         },
577         # Kaltura embed
578         {
579             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
580             'info_dict': {
581                 'id': '1_eergr3h1',
582                 'ext': 'mp4',
583                 'upload_date': '20150226',
584                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
585                 'timestamp': int,
586                 'title': 'John Carlson Postgame 2/25/15',
587             },
588         },
589         # Eagle.Platform embed (generic URL)
590         {
591             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
592             'info_dict': {
593                 'id': '227304',
594                 'ext': 'mp4',
595                 'title': 'Навальный вышел на свободу',
596                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
597                 'thumbnail': 're:^https?://.*\.jpg$',
598                 'duration': 87,
599                 'view_count': int,
600                 'age_limit': 0,
601             },
602         },
603         # ClipYou (Eagle.Platform) embed (custom URL)
604         {
605             'url': 'http://muz-tv.ru/play/7129/',
606             'info_dict': {
607                 'id': '12820',
608                 'ext': 'mp4',
609                 'title': "'O Sole Mio",
610                 'thumbnail': 're:^https?://.*\.jpg$',
611                 'duration': 216,
612                 'view_count': int,
613             },
614         },
615         # Pladform embed
616         {
617             'url': 'http://muz-tv.ru/kinozal/view/7400/',
618             'info_dict': {
619                 'id': '100183293',
620                 'ext': 'mp4',
621                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
622                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
623                 'thumbnail': 're:^https?://.*\.jpg$',
624                 'duration': 694,
625                 'age_limit': 0,
626             },
627         },
628         # Playwire embed
629         {
630             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
631             'info_dict': {
632                 'id': '3519514',
633                 'ext': 'mp4',
634                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
635                 'thumbnail': 're:^https?://.*\.png$',
636                 'duration': 45.115,
637             },
638         },
639         # 5min embed
640         {
641             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
642             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
643             'info_dict': {
644                 'id': '518726732',
645                 'ext': 'mp4',
646                 'title': 'Facebook Creates "On This Day" | Crunch Report',
647             },
648         },
649         # SVT embed
650         {
651             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
652             'info_dict': {
653                 'id': '2900353',
654                 'ext': 'flv',
655                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
656                 'duration': 27,
657                 'age_limit': 0,
658             },
659         },
660         # RSS feed with enclosure
661         {
662             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
663             'info_dict': {
664                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
665                 'ext': 'm4v',
666                 'upload_date': '20150228',
667                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
668             }
669         },
670         # Crooks and Liars embed
671         {
672             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
673             'info_dict': {
674                 'id': '8RUoRhRi',
675                 'ext': 'mp4',
676                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
677                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
678                 'timestamp': 1428207000,
679                 'upload_date': '20150405',
680                 'uploader': 'Heather',
681             },
682         },
683         # Crooks and Liars external embed
684         {
685             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
686             'info_dict': {
687                 'id': 'MTE3MjUtMzQ2MzA',
688                 'ext': 'mp4',
689                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
690                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
691                 'timestamp': 1265032391,
692                 'upload_date': '20100201',
693                 'uploader': 'Heather',
694             },
695         },
696         # NBC Sports vplayer embed
697         {
698             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
699             'info_dict': {
700                 'id': 'ln7x1qSThw4k',
701                 'ext': 'flv',
702                 'title': "PFT Live: New leader in the 'new-look' defense",
703                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
704             },
705         },
706         # UDN embed
707         {
708             'url': 'http://www.udn.com/news/story/7314/822787',
709             'md5': 'fd2060e988c326991037b9aff9df21a6',
710             'info_dict': {
711                 'id': '300346',
712                 'ext': 'mp4',
713                 'title': '中一中男師變性 全校師生力挺',
714                 'thumbnail': 're:^https?://.*\.jpg$',
715             }
716         },
717         # Ooyala embed
718         {
719             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
720             'info_dict': {
721                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
722                 'ext': 'mp4',
723                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
724                 'title': 'This is what separates the Excel masters from the wannabes',
725             },
726             'params': {
727                 # m3u8 downloads
728                 'skip_download': True,
729             }
730         },
731         # Contains a SMIL manifest
732         {
733             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
734             'info_dict': {
735                 'id': 'file',
736                 'ext': 'flv',
737                 'title': '+ Football: Lottery Champions League Europe',
738                 'uploader': 'www.telewebion.com',
739             },
740             'params': {
741                 # rtmpe downloads
742                 'skip_download': True,
743             }
744         }
745     ]
746
747     def report_following_redirect(self, new_url):
748         """Report information extraction."""
749         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
750
751     def _extract_rss(self, url, video_id, doc):
752         playlist_title = doc.find('./channel/title').text
753         playlist_desc_el = doc.find('./channel/description')
754         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
755
756         entries = []
757         for it in doc.findall('./channel/item'):
758             next_url = xpath_text(it, 'link', fatal=False)
759             if not next_url:
760                 enclosure_nodes = it.findall('./enclosure')
761                 for e in enclosure_nodes:
762                     next_url = e.attrib.get('url')
763                     if next_url:
764                         break
765
766             if not next_url:
767                 continue
768
769             entries.append({
770                 '_type': 'url',
771                 'url': next_url,
772                 'title': it.find('title').text,
773             })
774
775         return {
776             '_type': 'playlist',
777             'id': url,
778             'title': playlist_title,
779             'description': playlist_desc,
780             'entries': entries,
781         }
782
783     def _extract_camtasia(self, url, video_id, webpage):
784         """ Returns None if no camtasia video can be found. """
785
786         camtasia_cfg = self._search_regex(
787             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
788             webpage, 'camtasia configuration file', default=None)
789         if camtasia_cfg is None:
790             return None
791
792         title = self._html_search_meta('DC.title', webpage, fatal=True)
793
794         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
795         camtasia_cfg = self._download_xml(
796             camtasia_url, video_id,
797             note='Downloading camtasia configuration',
798             errnote='Failed to download camtasia configuration')
799         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
800
801         entries = []
802         for n in fileset_node.getchildren():
803             url_n = n.find('./uri')
804             if url_n is None:
805                 continue
806
807             entries.append({
808                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
809                 'title': '%s - %s' % (title, n.tag),
810                 'url': compat_urlparse.urljoin(url, url_n.text),
811                 'duration': float_or_none(n.find('./duration').text),
812             })
813
814         return {
815             '_type': 'playlist',
816             'entries': entries,
817             'title': title,
818         }
819
820     def _real_extract(self, url):
821         if url.startswith('//'):
822             return {
823                 '_type': 'url',
824                 'url': self.http_scheme() + url,
825             }
826
827         parsed_url = compat_urlparse.urlparse(url)
828         if not parsed_url.scheme:
829             default_search = self._downloader.params.get('default_search')
830             if default_search is None:
831                 default_search = 'fixup_error'
832
833             if default_search in ('auto', 'auto_warning', 'fixup_error'):
834                 if '/' in url:
835                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
836                     return self.url_result('http://' + url)
837                 elif default_search != 'fixup_error':
838                     if default_search == 'auto_warning':
839                         if re.match(r'^(?:url|URL)$', url):
840                             raise ExtractorError(
841                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
842                                 expected=True)
843                         else:
844                             self._downloader.report_warning(
845                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
846                     return self.url_result('ytsearch:' + url)
847
848             if default_search in ('error', 'fixup_error'):
849                 raise ExtractorError(
850                     '%r is not a valid URL. '
851                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
852                     % (url, url), expected=True)
853             else:
854                 if ':' not in default_search:
855                     default_search += ':'
856                 return self.url_result(default_search + url)
857
858         url, smuggled_data = unsmuggle_url(url)
859         force_videoid = None
860         is_intentional = smuggled_data and smuggled_data.get('to_generic')
861         if smuggled_data and 'force_videoid' in smuggled_data:
862             force_videoid = smuggled_data['force_videoid']
863             video_id = force_videoid
864         else:
865             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
866
867         self.to_screen('%s: Requesting header' % video_id)
868
869         head_req = HEADRequest(url)
870         head_response = self._request_webpage(
871             head_req, video_id,
872             note=False, errnote='Could not send HEAD request to %s' % url,
873             fatal=False)
874
875         if head_response is not False:
876             # Check for redirect
877             new_url = head_response.geturl()
878             if url != new_url:
879                 self.report_following_redirect(new_url)
880                 if force_videoid:
881                     new_url = smuggle_url(
882                         new_url, {'force_videoid': force_videoid})
883                 return self.url_result(new_url)
884
885         full_response = None
886         if head_response is False:
887             full_response = self._request_webpage(url, video_id)
888             head_response = full_response
889
890         # Check for direct link to a video
891         content_type = head_response.headers.get('Content-Type', '')
892         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
893         if m:
894             upload_date = unified_strdate(
895                 head_response.headers.get('Last-Modified'))
896             return {
897                 'id': video_id,
898                 'title': os.path.splitext(url_basename(url))[0],
899                 'direct': True,
900                 'formats': [{
901                     'format_id': m.group('format_id'),
902                     'url': url,
903                     'vcodec': 'none' if m.group('type') == 'audio' else None
904                 }],
905                 'upload_date': upload_date,
906             }
907
908         if not self._downloader.params.get('test', False) and not is_intentional:
909             self._downloader.report_warning('Falling back on generic information extractor.')
910
911         if not full_response:
912             full_response = self._request_webpage(url, video_id)
913
914         # Maybe it's a direct link to a video?
915         # Be careful not to download the whole thing!
916         first_bytes = full_response.read(512)
917         if not is_html(first_bytes):
918             self._downloader.report_warning(
919                 'URL could be a direct video link, returning it as such.')
920             upload_date = unified_strdate(
921                 head_response.headers.get('Last-Modified'))
922             return {
923                 'id': video_id,
924                 'title': os.path.splitext(url_basename(url))[0],
925                 'direct': True,
926                 'url': url,
927                 'upload_date': upload_date,
928             }
929
930         webpage = self._webpage_read_content(
931             full_response, url, video_id, prefix=first_bytes)
932
933         self.report_extraction(video_id)
934
935         # Is it an RSS feed?
936         try:
937             doc = parse_xml(webpage)
938             if doc.tag == 'rss':
939                 return self._extract_rss(url, video_id, doc)
940         except compat_xml_parse_error:
941             pass
942
943         # Is it a Camtasia project?
944         camtasia_res = self._extract_camtasia(url, video_id, webpage)
945         if camtasia_res is not None:
946             return camtasia_res
947
948         # Sometimes embedded video player is hidden behind percent encoding
949         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
950         # Unescaping the whole page allows to handle those cases in a generic way
951         webpage = compat_urllib_parse.unquote(webpage)
952
953         # it's tempting to parse this further, but you would
954         # have to take into account all the variations like
955         #   Video Title - Site Name
956         #   Site Name | Video Title
957         #   Video Title - Tagline | Site Name
958         # and so on and so forth; it's just not practical
959         video_title = self._html_search_regex(
960             r'(?s)<title>(.*?)</title>', webpage, 'video title',
961             default='video')
962
963         # Try to detect age limit automatically
964         age_limit = self._rta_search(webpage)
965         # And then there are the jokers who advertise that they use RTA,
966         # but actually don't.
967         AGE_LIMIT_MARKERS = [
968             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
969         ]
970         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
971             age_limit = 18
972
973         # video uploader is domain name
974         video_uploader = self._search_regex(
975             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
976
977         # Helper method
978         def _playlist_from_matches(matches, getter=None, ie=None):
979             urlrs = orderedSet(
980                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
981                 for m in matches)
982             return self.playlist_result(
983                 urlrs, playlist_id=video_id, playlist_title=video_title)
984
985         # Look for BrightCove:
986         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
987         if bc_urls:
988             self.to_screen('Brightcove video detected.')
989             entries = [{
990                 '_type': 'url',
991                 'url': smuggle_url(bc_url, {'Referer': url}),
992                 'ie_key': 'Brightcove'
993             } for bc_url in bc_urls]
994
995             return {
996                 '_type': 'playlist',
997                 'title': video_title,
998                 'id': video_id,
999                 'entries': entries,
1000             }
1001
1002         # Look for embedded rtl.nl player
1003         matches = re.findall(
1004             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
1005             webpage)
1006         if matches:
1007             return _playlist_from_matches(matches, ie='RtlNl')
1008
1009         # Look for embedded (iframe) Vimeo player
1010         mobj = re.search(
1011             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
1012         if mobj:
1013             player_url = unescapeHTML(mobj.group('url'))
1014             surl = smuggle_url(player_url, {'Referer': url})
1015             return self.url_result(surl)
1016         # Look for embedded (swf embed) Vimeo player
1017         mobj = re.search(
1018             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1019         if mobj:
1020             return self.url_result(mobj.group(1))
1021
1022         # Look for embedded YouTube player
1023         matches = re.findall(r'''(?x)
1024             (?:
1025                 <iframe[^>]+?src=|
1026                 data-video-url=|
1027                 <embed[^>]+?src=|
1028                 embedSWF\(?:\s*|
1029                 new\s+SWFObject\(
1030             )
1031             (["\'])
1032                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1033                 (?:embed|v|p)/.+?)
1034             \1''', webpage)
1035         if matches:
1036             return _playlist_from_matches(
1037                 matches, lambda m: unescapeHTML(m[1]))
1038
1039         # Look for lazyYT YouTube embed
1040         matches = re.findall(
1041             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1042         if matches:
1043             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1044
1045         # Look for embedded Dailymotion player
1046         matches = re.findall(
1047             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1048         if matches:
1049             return _playlist_from_matches(
1050                 matches, lambda m: unescapeHTML(m[1]))
1051
1052         # Look for embedded Dailymotion playlist player (#3822)
1053         m = re.search(
1054             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1055         if m:
1056             playlists = re.findall(
1057                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1058             if playlists:
1059                 return _playlist_from_matches(
1060                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1061
1062         # Look for embedded Wistia player
1063         match = re.search(
1064             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1065         if match:
1066             embed_url = self._proto_relative_url(
1067                 unescapeHTML(match.group('url')))
1068             return {
1069                 '_type': 'url_transparent',
1070                 'url': embed_url,
1071                 'ie_key': 'Wistia',
1072                 'uploader': video_uploader,
1073                 'title': video_title,
1074                 'id': video_id,
1075             }
1076
1077         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1078         if match:
1079             return {
1080                 '_type': 'url_transparent',
1081                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1082                 'ie_key': 'Wistia',
1083                 'uploader': video_uploader,
1084                 'title': video_title,
1085                 'id': match.group('id')
1086             }
1087
1088         # Look for embedded blip.tv player
1089         bliptv_url = BlipTVIE._extract_url(webpage)
1090         if bliptv_url:
1091             return self.url_result(bliptv_url, 'BlipTV')
1092
1093         # Look for SVT player
1094         svt_url = SVTIE._extract_url(webpage)
1095         if svt_url:
1096             return self.url_result(svt_url, 'SVT')
1097
1098         # Look for embedded condenast player
1099         matches = re.findall(
1100             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1101             webpage)
1102         if matches:
1103             return {
1104                 '_type': 'playlist',
1105                 'entries': [{
1106                     '_type': 'url',
1107                     'ie_key': 'CondeNast',
1108                     'url': ma,
1109                 } for ma in matches],
1110                 'title': video_title,
1111                 'id': video_id,
1112             }
1113
1114         # Look for Bandcamp pages with custom domain
1115         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1116         if mobj is not None:
1117             burl = unescapeHTML(mobj.group(1))
1118             # Don't set the extractor because it can be a track url or an album
1119             return self.url_result(burl)
1120
1121         # Look for embedded Vevo player
1122         mobj = re.search(
1123             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1124         if mobj is not None:
1125             return self.url_result(mobj.group('url'))
1126
1127         # Look for embedded Viddler player
1128         mobj = re.search(
1129             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1130             webpage)
1131         if mobj is not None:
1132             return self.url_result(mobj.group('url'))
1133
1134         # Look for NYTimes player
1135         mobj = re.search(
1136             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1137             webpage)
1138         if mobj is not None:
1139             return self.url_result(mobj.group('url'))
1140
1141         # Look for Libsyn player
1142         mobj = re.search(
1143             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1144         if mobj is not None:
1145             return self.url_result(mobj.group('url'))
1146
1147         # Look for Ooyala videos
1148         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1149                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1150                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1151                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1152         if mobj is not None:
1153             return OoyalaIE._build_url_result(mobj.group('ec'))
1154
1155         # Look for multiple Ooyala embeds on SBN network websites
1156         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1157         if mobj is not None:
1158             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1159             if embeds:
1160                 return _playlist_from_matches(
1161                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1162
1163         # Look for Aparat videos
1164         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1165         if mobj is not None:
1166             return self.url_result(mobj.group(1), 'Aparat')
1167
1168         # Look for MPORA videos
1169         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1170         if mobj is not None:
1171             return self.url_result(mobj.group(1), 'Mpora')
1172
1173         # Look for embedded NovaMov-based player
1174         mobj = re.search(
1175             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1176                     (?P<url>http://(?:(?:embed|www)\.)?
1177                         (?:novamov\.com|
1178                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1179                            videoweed\.(?:es|com)|
1180                            movshare\.(?:net|sx|ag)|
1181                            divxstage\.(?:eu|net|ch|co|at|ag))
1182                         /embed\.php.+?)\1''', webpage)
1183         if mobj is not None:
1184             return self.url_result(mobj.group('url'))
1185
1186         # Look for embedded Facebook player
1187         mobj = re.search(
1188             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1189         if mobj is not None:
1190             return self.url_result(mobj.group('url'), 'Facebook')
1191
1192         # Look for embedded VK player
1193         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1194         if mobj is not None:
1195             return self.url_result(mobj.group('url'), 'VK')
1196
1197         # Look for embedded ivi player
1198         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1199         if mobj is not None:
1200             return self.url_result(mobj.group('url'), 'Ivi')
1201
1202         # Look for embedded Huffington Post player
1203         mobj = re.search(
1204             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1205         if mobj is not None:
1206             return self.url_result(mobj.group('url'), 'HuffPost')
1207
1208         # Look for embed.ly
1209         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1210         if mobj is not None:
1211             return self.url_result(mobj.group('url'))
1212         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1213         if mobj is not None:
1214             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1215
1216         # Look for funnyordie embed
1217         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1218         if matches:
1219             return _playlist_from_matches(
1220                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1221
1222         # Look for BBC iPlayer embed
1223         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1224         if matches:
1225             return _playlist_from_matches(matches, ie='BBCCoUk')
1226
1227         # Look for embedded RUTV player
1228         rutv_url = RUTVIE._extract_url(webpage)
1229         if rutv_url:
1230             return self.url_result(rutv_url, 'RUTV')
1231
1232         # Look for embedded TED player
1233         mobj = re.search(
1234             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1235         if mobj is not None:
1236             return self.url_result(mobj.group('url'), 'TED')
1237
1238         # Look for embedded Ustream videos
1239         mobj = re.search(
1240             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1241         if mobj is not None:
1242             return self.url_result(mobj.group('url'), 'Ustream')
1243
1244         # Look for embedded arte.tv player
1245         mobj = re.search(
1246             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1247             webpage)
1248         if mobj is not None:
1249             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1250
1251         # Look for embedded smotri.com player
1252         smotri_url = SmotriIE._extract_url(webpage)
1253         if smotri_url:
1254             return self.url_result(smotri_url, 'Smotri')
1255
1256         # Look for embeded soundcloud player
1257         mobj = re.search(
1258             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1259             webpage)
1260         if mobj is not None:
1261             url = unescapeHTML(mobj.group('url'))
1262             return self.url_result(url)
1263
1264         # Look for embedded vulture.com player
1265         mobj = re.search(
1266             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1267             webpage)
1268         if mobj is not None:
1269             url = unescapeHTML(mobj.group('url'))
1270             return self.url_result(url, ie='Vulture')
1271
1272         # Look for embedded mtvservices player
1273         mobj = re.search(
1274             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1275             webpage)
1276         if mobj is not None:
1277             url = unescapeHTML(mobj.group('url'))
1278             return self.url_result(url, ie='MTVServicesEmbedded')
1279
1280         # Look for embedded yahoo player
1281         mobj = re.search(
1282             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1283             webpage)
1284         if mobj is not None:
1285             return self.url_result(mobj.group('url'), 'Yahoo')
1286
1287         # Look for embedded sbs.com.au player
1288         mobj = re.search(
1289             r'''(?x)
1290             (?:
1291                 <meta\s+property="og:video"\s+content=|
1292                 <iframe[^>]+?src=
1293             )
1294             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1295             webpage)
1296         if mobj is not None:
1297             return self.url_result(mobj.group('url'), 'SBS')
1298
1299         # Look for embedded Cinchcast player
1300         mobj = re.search(
1301             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1302             webpage)
1303         if mobj is not None:
1304             return self.url_result(mobj.group('url'), 'Cinchcast')
1305
1306         mobj = re.search(
1307             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1308             webpage)
1309         if not mobj:
1310             mobj = re.search(
1311                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1312                 webpage)
1313         if mobj is not None:
1314             return self.url_result(mobj.group('url'), 'MLB')
1315
1316         mobj = re.search(
1317             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1318             webpage)
1319         if mobj is not None:
1320             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1321
1322         mobj = re.search(
1323             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1324             webpage)
1325         if mobj is not None:
1326             return self.url_result(mobj.group('url'), 'Livestream')
1327
1328         # Look for Zapiks embed
1329         mobj = re.search(
1330             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1331         if mobj is not None:
1332             return self.url_result(mobj.group('url'), 'Zapiks')
1333
1334         # Look for Kaltura embeds
1335         mobj = re.search(
1336             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1337         if mobj is not None:
1338             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1339
1340         # Look for Eagle.Platform embeds
1341         mobj = re.search(
1342             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1343         if mobj is not None:
1344             return self.url_result(mobj.group('url'), 'EaglePlatform')
1345
1346         # Look for ClipYou (uses Eagle.Platform) embeds
1347         mobj = re.search(
1348             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1349         if mobj is not None:
1350             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1351
1352         # Look for Pladform embeds
1353         mobj = re.search(
1354             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1355         if mobj is not None:
1356             return self.url_result(mobj.group('url'), 'Pladform')
1357
1358         # Look for Playwire embeds
1359         mobj = re.search(
1360             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1361         if mobj is not None:
1362             return self.url_result(mobj.group('url'))
1363
1364         # Look for 5min embeds
1365         mobj = re.search(
1366             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1367         if mobj is not None:
1368             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1369
1370         # Look for Crooks and Liars embeds
1371         mobj = re.search(
1372             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1373         if mobj is not None:
1374             return self.url_result(mobj.group('url'))
1375
1376         # Look for NBC Sports VPlayer embeds
1377         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1378         if nbc_sports_url:
1379             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1380
1381         # Look for UDN embeds
1382         mobj = re.search(
1383             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1384         if mobj is not None:
1385             return self.url_result(
1386                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1387
1388         # Look for Senate ISVP iframe
1389         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1390         if senate_isvp_url:
1391             return self.url_result(surl, 'SenateISVP')
1392
1393         def check_video(vurl):
1394             if YoutubeIE.suitable(vurl):
1395                 return True
1396             vpath = compat_urlparse.urlparse(vurl).path
1397             vext = determine_ext(vpath)
1398             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1399
1400         def filter_video(urls):
1401             return list(filter(check_video, urls))
1402
1403         # Start with something easy: JW Player in SWFObject
1404         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1405         if not found:
1406             # Look for gorilla-vid style embedding
1407             found = filter_video(re.findall(r'''(?sx)
1408                 (?:
1409                     jw_plugins|
1410                     JWPlayerOptions|
1411                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1412                 )
1413                 .*?
1414                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1415         if not found:
1416             # Broaden the search a little bit
1417             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1418         if not found:
1419             # Broaden the findall a little bit: JWPlayer JS loader
1420             found = filter_video(re.findall(
1421                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1422         if not found:
1423             # Flow player
1424             found = filter_video(re.findall(r'''(?xs)
1425                 flowplayer\("[^"]+",\s*
1426                     \{[^}]+?\}\s*,
1427                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1428                         ["']?url["']?\s*:\s*["']([^"']+)["']
1429             ''', webpage))
1430         if not found:
1431             # Cinerama player
1432             found = re.findall(
1433                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1434         if not found:
1435             # Try to find twitter cards info
1436             found = filter_video(re.findall(
1437                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1438         if not found:
1439             # We look for Open Graph info:
1440             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1441             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1442             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1443             if m_video_type is not None:
1444                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1445         if not found:
1446             # HTML5 video
1447             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1448         if not found:
1449             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1450             found = re.search(
1451                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1452                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1453                 webpage)
1454             if not found:
1455                 # Look also in Refresh HTTP header
1456                 refresh_header = head_response.headers.get('Refresh')
1457                 if refresh_header:
1458                     found = re.search(REDIRECT_REGEX, refresh_header)
1459             if found:
1460                 new_url = compat_urlparse.urljoin(url, found.group(1))
1461                 self.report_following_redirect(new_url)
1462                 return {
1463                     '_type': 'url',
1464                     'url': new_url,
1465                 }
1466         if not found:
1467             raise UnsupportedError(url)
1468
1469         entries = []
1470         for video_url in found:
1471             video_url = compat_urlparse.urljoin(url, video_url)
1472             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1473
1474             # Sometimes, jwplayer extraction will result in a YouTube URL
1475             if YoutubeIE.suitable(video_url):
1476                 entries.append(self.url_result(video_url, 'Youtube'))
1477                 continue
1478
1479             # here's a fun little line of code for you:
1480             video_id = os.path.splitext(video_id)[0]
1481
1482             if determine_ext(video_url) == 'smil':
1483                 entries.append({
1484                     'id': video_id,
1485                     'formats': self._extract_smil_formats(video_url, video_id),
1486                     'uploader': video_uploader,
1487                     'title': video_title,
1488                     'age_limit': age_limit,
1489                 })
1490             else:
1491                 entries.append({
1492                     'id': video_id,
1493                     'url': video_url,
1494                     'uploader': video_uploader,
1495                     'title': video_title,
1496                     'age_limit': age_limit,
1497                 })
1498
1499         if len(entries) == 1:
1500             return entries[0]
1501         else:
1502             for num, e in enumerate(entries, start=1):
1503                 # 'url' results don't have a title
1504                 if e.get('title') is not None:
1505                     e['title'] = '%s (%d)' % (e['title'], num)
1506             return {
1507                 '_type': 'playlist',
1508                 'entries': entries,
1509             }