[generic] Add support for sportbox embeds
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7
8 from .common import InfoExtractor
9 from .youtube import YoutubeIE
10 from ..compat import (
11     compat_urllib_parse,
12     compat_urlparse,
13     compat_xml_parse_error,
14 )
15 from ..utils import (
16     determine_ext,
17     ExtractorError,
18     float_or_none,
19     HEADRequest,
20     is_html,
21     orderedSet,
22     parse_xml,
23     smuggle_url,
24     unescapeHTML,
25     unified_strdate,
26     unsmuggle_url,
27     UnsupportedError,
28     url_basename,
29     xpath_text,
30 )
31 from .brightcove import BrightcoveIE
32 from .nbc import NBCSportsVPlayerIE
33 from .ooyala import OoyalaIE
34 from .rutv import RUTVIE
35 from .sportbox import SportBoxEmbedIE
36 from .smotri import SmotriIE
37 from .condenast import CondeNastIE
38 from .udn import UDNEmbedIE
39 from .senateisvp import SenateISVPIE
40 from .bliptv import BlipTVIE
41 from .svt import SVTIE
42
43
44 class GenericIE(InfoExtractor):
45     IE_DESC = 'Generic downloader that works on some sites'
46     _VALID_URL = r'.*'
47     IE_NAME = 'generic'
48     _TESTS = [
49         {
50             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
51             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
52             'info_dict': {
53                 'id': '13601338388002',
54                 'ext': 'mp4',
55                 'uploader': 'www.hodiho.fr',
56                 'title': 'R\u00e9gis plante sa Jeep',
57             }
58         },
59         # bandcamp page with custom domain
60         {
61             'add_ie': ['Bandcamp'],
62             'url': 'http://bronyrock.com/track/the-pony-mash',
63             'info_dict': {
64                 'id': '3235767654',
65                 'ext': 'mp3',
66                 'title': 'The Pony Mash',
67                 'uploader': 'M_Pallante',
68             },
69             'skip': 'There is a limit of 200 free downloads / month for the test song',
70         },
71         # embedded brightcove video
72         # it also tests brightcove videos that need to set the 'Referer' in the
73         # http requests
74         {
75             'add_ie': ['Brightcove'],
76             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
77             'info_dict': {
78                 'id': '2765128793001',
79                 'ext': 'mp4',
80                 'title': 'Le cours de bourse : l’analyse technique',
81                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
82                 'uploader': 'BFM BUSINESS',
83             },
84             'params': {
85                 'skip_download': True,
86             },
87         },
88         {
89             # https://github.com/rg3/youtube-dl/issues/2253
90             'url': 'http://bcove.me/i6nfkrc3',
91             'md5': '0ba9446db037002366bab3b3eb30c88c',
92             'info_dict': {
93                 'id': '3101154703001',
94                 'ext': 'mp4',
95                 'title': 'Still no power',
96                 'uploader': 'thestar.com',
97                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
98             },
99             'add_ie': ['Brightcove'],
100         },
101         {
102             'url': 'http://www.championat.com/video/football/v/87/87499.html',
103             'md5': 'fb973ecf6e4a78a67453647444222983',
104             'info_dict': {
105                 'id': '3414141473001',
106                 'ext': 'mp4',
107                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
108                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
109                 'uploader': 'Championat',
110             },
111         },
112         {
113             # https://github.com/rg3/youtube-dl/issues/3541
114             'add_ie': ['Brightcove'],
115             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
116             'info_dict': {
117                 'id': '3866516442001',
118                 'ext': 'mp4',
119                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
120                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
121                 'uploader': 'SBS Broadcasting',
122             },
123             'skip': 'Restricted to Netherlands',
124             'params': {
125                 'skip_download': True,  # m3u8 download
126             },
127         },
128         # Direct link to a video
129         {
130             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
131             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
132             'info_dict': {
133                 'id': 'trailer',
134                 'ext': 'mp4',
135                 'title': 'trailer',
136                 'upload_date': '20100513',
137             }
138         },
139         # ooyala video
140         {
141             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
142             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
143             'info_dict': {
144                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
145                 'ext': 'mp4',
146                 'title': '2cc213299525360.mov',  # that's what we get
147             },
148             'add_ie': ['Ooyala'],
149         },
150         # multiple ooyala embeds on SBN network websites
151         {
152             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
153             'info_dict': {
154                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
155                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
156             },
157             'playlist_mincount': 3,
158             'params': {
159                 'skip_download': True,
160             },
161             'add_ie': ['Ooyala'],
162         },
163         # google redirect
164         {
165             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
166             'info_dict': {
167                 'id': 'cmQHVoWB5FY',
168                 'ext': 'mp4',
169                 'upload_date': '20130224',
170                 'uploader_id': 'TheVerge',
171                 'description': 're:^Chris Ziegler takes a look at the\.*',
172                 'uploader': 'The Verge',
173                 'title': 'First Firefox OS phones side-by-side',
174             },
175             'params': {
176                 'skip_download': False,
177             }
178         },
179         # embed.ly video
180         {
181             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
182             'info_dict': {
183                 'id': '9ODmcdjQcHQ',
184                 'ext': 'mp4',
185                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
186                 'upload_date': '20140225',
187                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
188                 'uploader': 'Tested',
189                 'uploader_id': 'testedcom',
190             },
191             # No need to test YoutubeIE here
192             'params': {
193                 'skip_download': True,
194             },
195         },
196         # funnyordie embed
197         {
198             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
199             'info_dict': {
200                 'id': '18e820ec3f',
201                 'ext': 'mp4',
202                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
203                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
204             },
205         },
206         # BBC iPlayer embeds
207         {
208             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
209             'info_dict': {
210                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
211             },
212             'playlist_mincount': 18,
213         },
214         # RUTV embed
215         {
216             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
217             'info_dict': {
218                 'id': '776940',
219                 'ext': 'mp4',
220                 'title': 'Охотское море стало целиком российским',
221                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
222             },
223             'params': {
224                 # m3u8 download
225                 'skip_download': True,
226             },
227         },
228         # Embedded TED video
229         {
230             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
231             'md5': '65fdff94098e4a607385a60c5177c638',
232             'info_dict': {
233                 'id': '1969',
234                 'ext': 'mp4',
235                 'title': 'Hidden miracles of the natural world',
236                 'uploader': 'Louie Schwartzberg',
237                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
238             }
239         },
240         # Embeded Ustream video
241         {
242             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
243             'md5': '27b99cdb639c9b12a79bca876a073417',
244             'info_dict': {
245                 'id': '45734260',
246                 'ext': 'flv',
247                 'uploader': 'AU SPA:  The NSA and Privacy',
248                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
249             }
250         },
251         # nowvideo embed hidden behind percent encoding
252         {
253             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
254             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
255             'info_dict': {
256                 'id': '06e53103ca9aa',
257                 'ext': 'flv',
258                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
259                 'description': 'No description',
260             },
261         },
262         # arte embed
263         {
264             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
265             'md5': '7653032cbb25bf6c80d80f217055fa43',
266             'info_dict': {
267                 'id': '048195-004_PLUS7-F',
268                 'ext': 'flv',
269                 'title': 'X:enius',
270                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
271                 'upload_date': '20140320',
272             },
273             'params': {
274                 'skip_download': 'Requires rtmpdump'
275             }
276         },
277         # Condé Nast embed
278         {
279             'url': 'http://www.wired.com/2014/04/honda-asimo/',
280             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
281             'info_dict': {
282                 'id': '53501be369702d3275860000',
283                 'ext': 'mp4',
284                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
285             }
286         },
287         # Dailymotion embed
288         {
289             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
290             'md5': '441aeeb82eb72c422c7f14ec533999cd',
291             'info_dict': {
292                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
293                 'ext': 'mp4',
294                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
295                 'uploader': 'Spi0n',
296             },
297             'add_ie': ['Dailymotion'],
298         },
299         # YouTube embed
300         {
301             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
302             'info_dict': {
303                 'id': 'FXRb4ykk4S0',
304                 'ext': 'mp4',
305                 'title': 'The NBL Auction 2014',
306                 'uploader': 'BADMINTON England',
307                 'uploader_id': 'BADMINTONEvents',
308                 'upload_date': '20140603',
309                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
310             },
311             'add_ie': ['Youtube'],
312             'params': {
313                 'skip_download': True,
314             }
315         },
316         # MTVSercices embed
317         {
318             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
319             'md5': '35727f82f58c76d996fc188f9755b0d5',
320             'info_dict': {
321                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
322                 'ext': 'mp4',
323                 'title': 'Review',
324                 'description': 'Mario\'s life in the fast lane has never looked so good.',
325             },
326         },
327         # YouTube embed via <data-embed-url="">
328         {
329             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
330             'info_dict': {
331                 'id': '4vAffPZIT44',
332                 'ext': 'mp4',
333                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
334                 'uploader': 'Gameloft',
335                 'uploader_id': 'gameloft',
336                 'upload_date': '20140828',
337                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
338             },
339             'params': {
340                 'skip_download': True,
341             }
342         },
343         # Camtasia studio
344         {
345             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
346             'playlist': [{
347                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
348                 'info_dict': {
349                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
350                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
351                     'ext': 'flv',
352                     'duration': 2235.90,
353                 }
354             }, {
355                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
356                 'info_dict': {
357                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
358                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
359                     'ext': 'flv',
360                     'duration': 2235.93,
361                 }
362             }],
363             'info_dict': {
364                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
365             }
366         },
367         # Flowplayer
368         {
369             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
370             'md5': '9d65602bf31c6e20014319c7d07fba27',
371             'info_dict': {
372                 'id': '5123ea6d5e5a7',
373                 'ext': 'mp4',
374                 'age_limit': 18,
375                 'uploader': 'www.handjobhub.com',
376                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
377             }
378         },
379         # RSS feed
380         {
381             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
382             'info_dict': {
383                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
384                 'title': 'Zero Punctuation',
385                 'description': 're:.*groundbreaking video review series.*'
386             },
387             'playlist_mincount': 11,
388         },
389         # Multiple brightcove videos
390         # https://github.com/rg3/youtube-dl/issues/2283
391         {
392             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
393             'info_dict': {
394                 'id': 'always-never',
395                 'title': 'Always / Never - The New Yorker',
396             },
397             'playlist_count': 3,
398             'params': {
399                 'extract_flat': False,
400                 'skip_download': True,
401             }
402         },
403         # MLB embed
404         {
405             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
406             'md5': '96f09a37e44da40dd083e12d9a683327',
407             'info_dict': {
408                 'id': '33322633',
409                 'ext': 'mp4',
410                 'title': 'Ump changes call to ball',
411                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
412                 'duration': 48,
413                 'timestamp': 1401537900,
414                 'upload_date': '20140531',
415                 'thumbnail': 're:^https?://.*\.jpg$',
416             },
417         },
418         # Wistia embed
419         {
420             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
421             'md5': '8788b683c777a5cf25621eaf286d0c23',
422             'info_dict': {
423                 'id': '1cfaf6b7ea',
424                 'ext': 'mov',
425                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
426                 'duration': 643.0,
427                 'filesize': 182808282,
428                 'uploader': 'education-portal.com',
429             },
430         },
431         {
432             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
433             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
434             'info_dict': {
435                 'id': 'uxjb0lwrcz',
436                 'ext': 'mp4',
437                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
438                 'duration': 1715.0,
439                 'uploader': 'thoughtworks.wistia.com',
440             },
441         },
442         # Direct download with broken HEAD
443         {
444             'url': 'http://ai-radio.org:8000/radio.opus',
445             'info_dict': {
446                 'id': 'radio',
447                 'ext': 'opus',
448                 'title': 'radio',
449             },
450             'params': {
451                 'skip_download': True,  # infinite live stream
452             },
453             'expected_warnings': [
454                 r'501.*Not Implemented'
455             ],
456         },
457         # Soundcloud embed
458         {
459             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
460             'info_dict': {
461                 'id': '174391317',
462                 'ext': 'mp3',
463                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
464                 'uploader': 'Sophos Security',
465                 'title': 'Chet Chat 171 - Oct 29, 2014',
466                 'upload_date': '20141029',
467             }
468         },
469         # Livestream embed
470         {
471             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
472             'info_dict': {
473                 'id': '67864563',
474                 'ext': 'flv',
475                 'upload_date': '20141112',
476                 'title': 'Rosetta #CometLanding webcast HL 10',
477             }
478         },
479         # LazyYT
480         {
481             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
482             'info_dict': {
483                 'id': '1986',
484                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
485             },
486             'playlist_mincount': 2,
487         },
488         # Direct link with incorrect MIME type
489         {
490             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
491             'md5': '4ccbebe5f36706d85221f204d7eb5913',
492             'info_dict': {
493                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
494                 'id': '5_Lennart_Poettering_-_Systemd',
495                 'ext': 'webm',
496                 'title': '5_Lennart_Poettering_-_Systemd',
497                 'upload_date': '20141120',
498             },
499             'expected_warnings': [
500                 'URL could be a direct video link, returning it as such.'
501             ]
502         },
503         # Cinchcast embed
504         {
505             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
506             'info_dict': {
507                 'id': '7141703',
508                 'ext': 'mp3',
509                 'upload_date': '20141126',
510                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
511             }
512         },
513         # Cinerama player
514         {
515             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
516             'info_dict': {
517                 'id': '730m_DandD_1901_512k',
518                 'ext': 'mp4',
519                 'uploader': 'www.abc.net.au',
520                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
521             }
522         },
523         # embedded viddler video
524         {
525             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
526             'info_dict': {
527                 'id': '4d03aad9',
528                 'ext': 'mp4',
529                 'uploader': 'deadspin',
530                 'title': 'WALL-TO-GORTAT',
531                 'timestamp': 1422285291,
532                 'upload_date': '20150126',
533             },
534             'add_ie': ['Viddler'],
535         },
536         # Libsyn embed
537         {
538             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
539             'info_dict': {
540                 'id': '3377616',
541                 'ext': 'mp3',
542                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
543                 'description': 'md5:601cb790edd05908957dae8aaa866465',
544                 'upload_date': '20150220',
545             },
546         },
547         # jwplayer YouTube
548         {
549             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
550             'info_dict': {
551                 'id': 'Mrj4DVp2zeA',
552                 'ext': 'mp4',
553                 'upload_date': '20150212',
554                 'uploader': 'The National Archives UK',
555                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
556                 'uploader_id': 'NationalArchives08',
557                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
558             },
559         },
560         # rtl.nl embed
561         {
562             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
563             'playlist_mincount': 5,
564             'info_dict': {
565                 'id': 'aanslagen-kopenhagen',
566                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
567             }
568         },
569         # Zapiks embed
570         {
571             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
572             'info_dict': {
573                 'id': '118046',
574                 'ext': 'mp4',
575                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
576             }
577         },
578         # Kaltura embed
579         {
580             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
581             'info_dict': {
582                 'id': '1_eergr3h1',
583                 'ext': 'mp4',
584                 'upload_date': '20150226',
585                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
586                 'timestamp': int,
587                 'title': 'John Carlson Postgame 2/25/15',
588             },
589         },
590         # Eagle.Platform embed (generic URL)
591         {
592             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
593             'info_dict': {
594                 'id': '227304',
595                 'ext': 'mp4',
596                 'title': 'Навальный вышел на свободу',
597                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
598                 'thumbnail': 're:^https?://.*\.jpg$',
599                 'duration': 87,
600                 'view_count': int,
601                 'age_limit': 0,
602             },
603         },
604         # ClipYou (Eagle.Platform) embed (custom URL)
605         {
606             'url': 'http://muz-tv.ru/play/7129/',
607             'info_dict': {
608                 'id': '12820',
609                 'ext': 'mp4',
610                 'title': "'O Sole Mio",
611                 'thumbnail': 're:^https?://.*\.jpg$',
612                 'duration': 216,
613                 'view_count': int,
614             },
615         },
616         # Pladform embed
617         {
618             'url': 'http://muz-tv.ru/kinozal/view/7400/',
619             'info_dict': {
620                 'id': '100183293',
621                 'ext': 'mp4',
622                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
623                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
624                 'thumbnail': 're:^https?://.*\.jpg$',
625                 'duration': 694,
626                 'age_limit': 0,
627             },
628         },
629         # Playwire embed
630         {
631             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
632             'info_dict': {
633                 'id': '3519514',
634                 'ext': 'mp4',
635                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
636                 'thumbnail': 're:^https?://.*\.png$',
637                 'duration': 45.115,
638             },
639         },
640         # 5min embed
641         {
642             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
643             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
644             'info_dict': {
645                 'id': '518726732',
646                 'ext': 'mp4',
647                 'title': 'Facebook Creates "On This Day" | Crunch Report',
648             },
649         },
650         # SVT embed
651         {
652             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
653             'info_dict': {
654                 'id': '2900353',
655                 'ext': 'flv',
656                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
657                 'duration': 27,
658                 'age_limit': 0,
659             },
660         },
661         # RSS feed with enclosure
662         {
663             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
664             'info_dict': {
665                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
666                 'ext': 'm4v',
667                 'upload_date': '20150228',
668                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
669             }
670         },
671         # Crooks and Liars embed
672         {
673             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
674             'info_dict': {
675                 'id': '8RUoRhRi',
676                 'ext': 'mp4',
677                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
678                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
679                 'timestamp': 1428207000,
680                 'upload_date': '20150405',
681                 'uploader': 'Heather',
682             },
683         },
684         # Crooks and Liars external embed
685         {
686             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
687             'info_dict': {
688                 'id': 'MTE3MjUtMzQ2MzA',
689                 'ext': 'mp4',
690                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
691                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
692                 'timestamp': 1265032391,
693                 'upload_date': '20100201',
694                 'uploader': 'Heather',
695             },
696         },
697         # NBC Sports vplayer embed
698         {
699             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
700             'info_dict': {
701                 'id': 'ln7x1qSThw4k',
702                 'ext': 'flv',
703                 'title': "PFT Live: New leader in the 'new-look' defense",
704                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
705             },
706         },
707         # UDN embed
708         {
709             'url': 'http://www.udn.com/news/story/7314/822787',
710             'md5': 'fd2060e988c326991037b9aff9df21a6',
711             'info_dict': {
712                 'id': '300346',
713                 'ext': 'mp4',
714                 'title': '中一中男師變性 全校師生力挺',
715                 'thumbnail': 're:^https?://.*\.jpg$',
716             }
717         },
718         # Ooyala embed
719         {
720             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
721             'info_dict': {
722                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
723                 'ext': 'mp4',
724                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
725                 'title': 'This is what separates the Excel masters from the wannabes',
726             },
727             'params': {
728                 # m3u8 downloads
729                 'skip_download': True,
730             }
731         },
732         # Contains a SMIL manifest
733         {
734             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
735             'info_dict': {
736                 'id': 'file',
737                 'ext': 'flv',
738                 'title': '+ Football: Lottery Champions League Europe',
739                 'uploader': 'www.telewebion.com',
740             },
741             'params': {
742                 # rtmpe downloads
743                 'skip_download': True,
744             }
745         }
746     ]
747
748     def report_following_redirect(self, new_url):
749         """Report information extraction."""
750         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
751
752     def _extract_rss(self, url, video_id, doc):
753         playlist_title = doc.find('./channel/title').text
754         playlist_desc_el = doc.find('./channel/description')
755         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
756
757         entries = []
758         for it in doc.findall('./channel/item'):
759             next_url = xpath_text(it, 'link', fatal=False)
760             if not next_url:
761                 enclosure_nodes = it.findall('./enclosure')
762                 for e in enclosure_nodes:
763                     next_url = e.attrib.get('url')
764                     if next_url:
765                         break
766
767             if not next_url:
768                 continue
769
770             entries.append({
771                 '_type': 'url',
772                 'url': next_url,
773                 'title': it.find('title').text,
774             })
775
776         return {
777             '_type': 'playlist',
778             'id': url,
779             'title': playlist_title,
780             'description': playlist_desc,
781             'entries': entries,
782         }
783
784     def _extract_camtasia(self, url, video_id, webpage):
785         """ Returns None if no camtasia video can be found. """
786
787         camtasia_cfg = self._search_regex(
788             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
789             webpage, 'camtasia configuration file', default=None)
790         if camtasia_cfg is None:
791             return None
792
793         title = self._html_search_meta('DC.title', webpage, fatal=True)
794
795         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
796         camtasia_cfg = self._download_xml(
797             camtasia_url, video_id,
798             note='Downloading camtasia configuration',
799             errnote='Failed to download camtasia configuration')
800         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
801
802         entries = []
803         for n in fileset_node.getchildren():
804             url_n = n.find('./uri')
805             if url_n is None:
806                 continue
807
808             entries.append({
809                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
810                 'title': '%s - %s' % (title, n.tag),
811                 'url': compat_urlparse.urljoin(url, url_n.text),
812                 'duration': float_or_none(n.find('./duration').text),
813             })
814
815         return {
816             '_type': 'playlist',
817             'entries': entries,
818             'title': title,
819         }
820
821     def _real_extract(self, url):
822         if url.startswith('//'):
823             return {
824                 '_type': 'url',
825                 'url': self.http_scheme() + url,
826             }
827
828         parsed_url = compat_urlparse.urlparse(url)
829         if not parsed_url.scheme:
830             default_search = self._downloader.params.get('default_search')
831             if default_search is None:
832                 default_search = 'fixup_error'
833
834             if default_search in ('auto', 'auto_warning', 'fixup_error'):
835                 if '/' in url:
836                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
837                     return self.url_result('http://' + url)
838                 elif default_search != 'fixup_error':
839                     if default_search == 'auto_warning':
840                         if re.match(r'^(?:url|URL)$', url):
841                             raise ExtractorError(
842                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
843                                 expected=True)
844                         else:
845                             self._downloader.report_warning(
846                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
847                     return self.url_result('ytsearch:' + url)
848
849             if default_search in ('error', 'fixup_error'):
850                 raise ExtractorError(
851                     '%r is not a valid URL. '
852                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
853                     % (url, url), expected=True)
854             else:
855                 if ':' not in default_search:
856                     default_search += ':'
857                 return self.url_result(default_search + url)
858
859         url, smuggled_data = unsmuggle_url(url)
860         force_videoid = None
861         is_intentional = smuggled_data and smuggled_data.get('to_generic')
862         if smuggled_data and 'force_videoid' in smuggled_data:
863             force_videoid = smuggled_data['force_videoid']
864             video_id = force_videoid
865         else:
866             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
867
868         self.to_screen('%s: Requesting header' % video_id)
869
870         head_req = HEADRequest(url)
871         head_response = self._request_webpage(
872             head_req, video_id,
873             note=False, errnote='Could not send HEAD request to %s' % url,
874             fatal=False)
875
876         if head_response is not False:
877             # Check for redirect
878             new_url = head_response.geturl()
879             if url != new_url:
880                 self.report_following_redirect(new_url)
881                 if force_videoid:
882                     new_url = smuggle_url(
883                         new_url, {'force_videoid': force_videoid})
884                 return self.url_result(new_url)
885
886         full_response = None
887         if head_response is False:
888             full_response = self._request_webpage(url, video_id)
889             head_response = full_response
890
891         # Check for direct link to a video
892         content_type = head_response.headers.get('Content-Type', '')
893         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
894         if m:
895             upload_date = unified_strdate(
896                 head_response.headers.get('Last-Modified'))
897             return {
898                 'id': video_id,
899                 'title': os.path.splitext(url_basename(url))[0],
900                 'direct': True,
901                 'formats': [{
902                     'format_id': m.group('format_id'),
903                     'url': url,
904                     'vcodec': 'none' if m.group('type') == 'audio' else None
905                 }],
906                 'upload_date': upload_date,
907             }
908
909         if not self._downloader.params.get('test', False) and not is_intentional:
910             self._downloader.report_warning('Falling back on generic information extractor.')
911
912         if not full_response:
913             full_response = self._request_webpage(url, video_id)
914
915         # Maybe it's a direct link to a video?
916         # Be careful not to download the whole thing!
917         first_bytes = full_response.read(512)
918         if not is_html(first_bytes):
919             self._downloader.report_warning(
920                 'URL could be a direct video link, returning it as such.')
921             upload_date = unified_strdate(
922                 head_response.headers.get('Last-Modified'))
923             return {
924                 'id': video_id,
925                 'title': os.path.splitext(url_basename(url))[0],
926                 'direct': True,
927                 'url': url,
928                 'upload_date': upload_date,
929             }
930
931         webpage = self._webpage_read_content(
932             full_response, url, video_id, prefix=first_bytes)
933
934         self.report_extraction(video_id)
935
936         # Is it an RSS feed?
937         try:
938             doc = parse_xml(webpage)
939             if doc.tag == 'rss':
940                 return self._extract_rss(url, video_id, doc)
941         except compat_xml_parse_error:
942             pass
943
944         # Is it a Camtasia project?
945         camtasia_res = self._extract_camtasia(url, video_id, webpage)
946         if camtasia_res is not None:
947             return camtasia_res
948
949         # Sometimes embedded video player is hidden behind percent encoding
950         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
951         # Unescaping the whole page allows to handle those cases in a generic way
952         webpage = compat_urllib_parse.unquote(webpage)
953
954         # it's tempting to parse this further, but you would
955         # have to take into account all the variations like
956         #   Video Title - Site Name
957         #   Site Name | Video Title
958         #   Video Title - Tagline | Site Name
959         # and so on and so forth; it's just not practical
960         video_title = self._html_search_regex(
961             r'(?s)<title>(.*?)</title>', webpage, 'video title',
962             default='video')
963
964         # Try to detect age limit automatically
965         age_limit = self._rta_search(webpage)
966         # And then there are the jokers who advertise that they use RTA,
967         # but actually don't.
968         AGE_LIMIT_MARKERS = [
969             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
970         ]
971         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
972             age_limit = 18
973
974         # video uploader is domain name
975         video_uploader = self._search_regex(
976             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
977
978         # Helper method
979         def _playlist_from_matches(matches, getter=None, ie=None):
980             urlrs = orderedSet(
981                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
982                 for m in matches)
983             return self.playlist_result(
984                 urlrs, playlist_id=video_id, playlist_title=video_title)
985
986         # Look for BrightCove:
987         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
988         if bc_urls:
989             self.to_screen('Brightcove video detected.')
990             entries = [{
991                 '_type': 'url',
992                 'url': smuggle_url(bc_url, {'Referer': url}),
993                 'ie_key': 'Brightcove'
994             } for bc_url in bc_urls]
995
996             return {
997                 '_type': 'playlist',
998                 'title': video_title,
999                 'id': video_id,
1000                 'entries': entries,
1001             }
1002
1003         # Look for embedded rtl.nl player
1004         matches = re.findall(
1005             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
1006             webpage)
1007         if matches:
1008             return _playlist_from_matches(matches, ie='RtlNl')
1009
1010         # Look for embedded (iframe) Vimeo player
1011         mobj = re.search(
1012             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
1013         if mobj:
1014             player_url = unescapeHTML(mobj.group('url'))
1015             surl = smuggle_url(player_url, {'Referer': url})
1016             return self.url_result(surl)
1017         # Look for embedded (swf embed) Vimeo player
1018         mobj = re.search(
1019             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1020         if mobj:
1021             return self.url_result(mobj.group(1))
1022
1023         # Look for embedded YouTube player
1024         matches = re.findall(r'''(?x)
1025             (?:
1026                 <iframe[^>]+?src=|
1027                 data-video-url=|
1028                 <embed[^>]+?src=|
1029                 embedSWF\(?:\s*|
1030                 new\s+SWFObject\(
1031             )
1032             (["\'])
1033                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1034                 (?:embed|v|p)/.+?)
1035             \1''', webpage)
1036         if matches:
1037             return _playlist_from_matches(
1038                 matches, lambda m: unescapeHTML(m[1]))
1039
1040         # Look for lazyYT YouTube embed
1041         matches = re.findall(
1042             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1043         if matches:
1044             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1045
1046         # Look for embedded Dailymotion player
1047         matches = re.findall(
1048             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1049         if matches:
1050             return _playlist_from_matches(
1051                 matches, lambda m: unescapeHTML(m[1]))
1052
1053         # Look for embedded Dailymotion playlist player (#3822)
1054         m = re.search(
1055             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1056         if m:
1057             playlists = re.findall(
1058                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1059             if playlists:
1060                 return _playlist_from_matches(
1061                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1062
1063         # Look for embedded Wistia player
1064         match = re.search(
1065             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1066         if match:
1067             embed_url = self._proto_relative_url(
1068                 unescapeHTML(match.group('url')))
1069             return {
1070                 '_type': 'url_transparent',
1071                 'url': embed_url,
1072                 'ie_key': 'Wistia',
1073                 'uploader': video_uploader,
1074                 'title': video_title,
1075                 'id': video_id,
1076             }
1077
1078         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1079         if match:
1080             return {
1081                 '_type': 'url_transparent',
1082                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1083                 'ie_key': 'Wistia',
1084                 'uploader': video_uploader,
1085                 'title': video_title,
1086                 'id': match.group('id')
1087             }
1088
1089         # Look for embedded blip.tv player
1090         bliptv_url = BlipTVIE._extract_url(webpage)
1091         if bliptv_url:
1092             return self.url_result(bliptv_url, 'BlipTV')
1093
1094         # Look for SVT player
1095         svt_url = SVTIE._extract_url(webpage)
1096         if svt_url:
1097             return self.url_result(svt_url, 'SVT')
1098
1099         # Look for embedded condenast player
1100         matches = re.findall(
1101             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1102             webpage)
1103         if matches:
1104             return {
1105                 '_type': 'playlist',
1106                 'entries': [{
1107                     '_type': 'url',
1108                     'ie_key': 'CondeNast',
1109                     'url': ma,
1110                 } for ma in matches],
1111                 'title': video_title,
1112                 'id': video_id,
1113             }
1114
1115         # Look for Bandcamp pages with custom domain
1116         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1117         if mobj is not None:
1118             burl = unescapeHTML(mobj.group(1))
1119             # Don't set the extractor because it can be a track url or an album
1120             return self.url_result(burl)
1121
1122         # Look for embedded Vevo player
1123         mobj = re.search(
1124             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1125         if mobj is not None:
1126             return self.url_result(mobj.group('url'))
1127
1128         # Look for embedded Viddler player
1129         mobj = re.search(
1130             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1131             webpage)
1132         if mobj is not None:
1133             return self.url_result(mobj.group('url'))
1134
1135         # Look for NYTimes player
1136         mobj = re.search(
1137             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1138             webpage)
1139         if mobj is not None:
1140             return self.url_result(mobj.group('url'))
1141
1142         # Look for Libsyn player
1143         mobj = re.search(
1144             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1145         if mobj is not None:
1146             return self.url_result(mobj.group('url'))
1147
1148         # Look for Ooyala videos
1149         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1150                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1151                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1152                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1153         if mobj is not None:
1154             return OoyalaIE._build_url_result(mobj.group('ec'))
1155
1156         # Look for multiple Ooyala embeds on SBN network websites
1157         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1158         if mobj is not None:
1159             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1160             if embeds:
1161                 return _playlist_from_matches(
1162                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1163
1164         # Look for Aparat videos
1165         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1166         if mobj is not None:
1167             return self.url_result(mobj.group(1), 'Aparat')
1168
1169         # Look for MPORA videos
1170         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1171         if mobj is not None:
1172             return self.url_result(mobj.group(1), 'Mpora')
1173
1174         # Look for embedded NovaMov-based player
1175         mobj = re.search(
1176             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1177                     (?P<url>http://(?:(?:embed|www)\.)?
1178                         (?:novamov\.com|
1179                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1180                            videoweed\.(?:es|com)|
1181                            movshare\.(?:net|sx|ag)|
1182                            divxstage\.(?:eu|net|ch|co|at|ag))
1183                         /embed\.php.+?)\1''', webpage)
1184         if mobj is not None:
1185             return self.url_result(mobj.group('url'))
1186
1187         # Look for embedded Facebook player
1188         mobj = re.search(
1189             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1190         if mobj is not None:
1191             return self.url_result(mobj.group('url'), 'Facebook')
1192
1193         # Look for embedded VK player
1194         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1195         if mobj is not None:
1196             return self.url_result(mobj.group('url'), 'VK')
1197
1198         # Look for embedded ivi player
1199         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1200         if mobj is not None:
1201             return self.url_result(mobj.group('url'), 'Ivi')
1202
1203         # Look for embedded Huffington Post player
1204         mobj = re.search(
1205             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1206         if mobj is not None:
1207             return self.url_result(mobj.group('url'), 'HuffPost')
1208
1209         # Look for embed.ly
1210         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1211         if mobj is not None:
1212             return self.url_result(mobj.group('url'))
1213         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1214         if mobj is not None:
1215             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1216
1217         # Look for funnyordie embed
1218         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1219         if matches:
1220             return _playlist_from_matches(
1221                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1222
1223         # Look for BBC iPlayer embed
1224         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1225         if matches:
1226             return _playlist_from_matches(matches, ie='BBCCoUk')
1227
1228         # Look for embedded RUTV player
1229         rutv_url = RUTVIE._extract_url(webpage)
1230         if rutv_url:
1231             return self.url_result(rutv_url, 'RUTV')
1232
1233         # Look for embedded SportBox player
1234         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1235         if sportbox_urls:
1236             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1237
1238         # Look for embedded TED player
1239         mobj = re.search(
1240             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1241         if mobj is not None:
1242             return self.url_result(mobj.group('url'), 'TED')
1243
1244         # Look for embedded Ustream videos
1245         mobj = re.search(
1246             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1247         if mobj is not None:
1248             return self.url_result(mobj.group('url'), 'Ustream')
1249
1250         # Look for embedded arte.tv player
1251         mobj = re.search(
1252             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1253             webpage)
1254         if mobj is not None:
1255             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1256
1257         # Look for embedded smotri.com player
1258         smotri_url = SmotriIE._extract_url(webpage)
1259         if smotri_url:
1260             return self.url_result(smotri_url, 'Smotri')
1261
1262         # Look for embeded soundcloud player
1263         mobj = re.search(
1264             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1265             webpage)
1266         if mobj is not None:
1267             url = unescapeHTML(mobj.group('url'))
1268             return self.url_result(url)
1269
1270         # Look for embedded vulture.com player
1271         mobj = re.search(
1272             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1273             webpage)
1274         if mobj is not None:
1275             url = unescapeHTML(mobj.group('url'))
1276             return self.url_result(url, ie='Vulture')
1277
1278         # Look for embedded mtvservices player
1279         mobj = re.search(
1280             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1281             webpage)
1282         if mobj is not None:
1283             url = unescapeHTML(mobj.group('url'))
1284             return self.url_result(url, ie='MTVServicesEmbedded')
1285
1286         # Look for embedded yahoo player
1287         mobj = re.search(
1288             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1289             webpage)
1290         if mobj is not None:
1291             return self.url_result(mobj.group('url'), 'Yahoo')
1292
1293         # Look for embedded sbs.com.au player
1294         mobj = re.search(
1295             r'''(?x)
1296             (?:
1297                 <meta\s+property="og:video"\s+content=|
1298                 <iframe[^>]+?src=
1299             )
1300             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1301             webpage)
1302         if mobj is not None:
1303             return self.url_result(mobj.group('url'), 'SBS')
1304
1305         # Look for embedded Cinchcast player
1306         mobj = re.search(
1307             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1308             webpage)
1309         if mobj is not None:
1310             return self.url_result(mobj.group('url'), 'Cinchcast')
1311
1312         mobj = re.search(
1313             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1314             webpage)
1315         if not mobj:
1316             mobj = re.search(
1317                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1318                 webpage)
1319         if mobj is not None:
1320             return self.url_result(mobj.group('url'), 'MLB')
1321
1322         mobj = re.search(
1323             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1324             webpage)
1325         if mobj is not None:
1326             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1327
1328         mobj = re.search(
1329             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1330             webpage)
1331         if mobj is not None:
1332             return self.url_result(mobj.group('url'), 'Livestream')
1333
1334         # Look for Zapiks embed
1335         mobj = re.search(
1336             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1337         if mobj is not None:
1338             return self.url_result(mobj.group('url'), 'Zapiks')
1339
1340         # Look for Kaltura embeds
1341         mobj = re.search(
1342             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1343         if mobj is not None:
1344             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1345
1346         # Look for Eagle.Platform embeds
1347         mobj = re.search(
1348             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1349         if mobj is not None:
1350             return self.url_result(mobj.group('url'), 'EaglePlatform')
1351
1352         # Look for ClipYou (uses Eagle.Platform) embeds
1353         mobj = re.search(
1354             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1355         if mobj is not None:
1356             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1357
1358         # Look for Pladform embeds
1359         mobj = re.search(
1360             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1361         if mobj is not None:
1362             return self.url_result(mobj.group('url'), 'Pladform')
1363
1364         # Look for Playwire embeds
1365         mobj = re.search(
1366             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1367         if mobj is not None:
1368             return self.url_result(mobj.group('url'))
1369
1370         # Look for 5min embeds
1371         mobj = re.search(
1372             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1373         if mobj is not None:
1374             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1375
1376         # Look for Crooks and Liars embeds
1377         mobj = re.search(
1378             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1379         if mobj is not None:
1380             return self.url_result(mobj.group('url'))
1381
1382         # Look for NBC Sports VPlayer embeds
1383         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1384         if nbc_sports_url:
1385             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1386
1387         # Look for UDN embeds
1388         mobj = re.search(
1389             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1390         if mobj is not None:
1391             return self.url_result(
1392                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1393
1394         # Look for Senate ISVP iframe
1395         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1396         if senate_isvp_url:
1397             return self.url_result(surl, 'SenateISVP')
1398
1399         def check_video(vurl):
1400             if YoutubeIE.suitable(vurl):
1401                 return True
1402             vpath = compat_urlparse.urlparse(vurl).path
1403             vext = determine_ext(vpath)
1404             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1405
1406         def filter_video(urls):
1407             return list(filter(check_video, urls))
1408
1409         # Start with something easy: JW Player in SWFObject
1410         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1411         if not found:
1412             # Look for gorilla-vid style embedding
1413             found = filter_video(re.findall(r'''(?sx)
1414                 (?:
1415                     jw_plugins|
1416                     JWPlayerOptions|
1417                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1418                 )
1419                 .*?
1420                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1421         if not found:
1422             # Broaden the search a little bit
1423             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1424         if not found:
1425             # Broaden the findall a little bit: JWPlayer JS loader
1426             found = filter_video(re.findall(
1427                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1428         if not found:
1429             # Flow player
1430             found = filter_video(re.findall(r'''(?xs)
1431                 flowplayer\("[^"]+",\s*
1432                     \{[^}]+?\}\s*,
1433                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1434                         ["']?url["']?\s*:\s*["']([^"']+)["']
1435             ''', webpage))
1436         if not found:
1437             # Cinerama player
1438             found = re.findall(
1439                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1440         if not found:
1441             # Try to find twitter cards info
1442             found = filter_video(re.findall(
1443                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1444         if not found:
1445             # We look for Open Graph info:
1446             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1447             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1448             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1449             if m_video_type is not None:
1450                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1451         if not found:
1452             # HTML5 video
1453             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1454         if not found:
1455             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1456             found = re.search(
1457                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1458                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1459                 webpage)
1460             if not found:
1461                 # Look also in Refresh HTTP header
1462                 refresh_header = head_response.headers.get('Refresh')
1463                 if refresh_header:
1464                     found = re.search(REDIRECT_REGEX, refresh_header)
1465             if found:
1466                 new_url = compat_urlparse.urljoin(url, found.group(1))
1467                 self.report_following_redirect(new_url)
1468                 return {
1469                     '_type': 'url',
1470                     'url': new_url,
1471                 }
1472         if not found:
1473             raise UnsupportedError(url)
1474
1475         entries = []
1476         for video_url in found:
1477             video_url = compat_urlparse.urljoin(url, video_url)
1478             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1479
1480             # Sometimes, jwplayer extraction will result in a YouTube URL
1481             if YoutubeIE.suitable(video_url):
1482                 entries.append(self.url_result(video_url, 'Youtube'))
1483                 continue
1484
1485             # here's a fun little line of code for you:
1486             video_id = os.path.splitext(video_id)[0]
1487
1488             if determine_ext(video_url) == 'smil':
1489                 entries.append({
1490                     'id': video_id,
1491                     'formats': self._extract_smil_formats(video_url, video_id),
1492                     'uploader': video_uploader,
1493                     'title': video_title,
1494                     'age_limit': age_limit,
1495                 })
1496             else:
1497                 entries.append({
1498                     'id': video_id,
1499                     'url': video_url,
1500                     'uploader': video_uploader,
1501                     'title': video_title,
1502                     'age_limit': age_limit,
1503                 })
1504
1505         if len(entries) == 1:
1506             return entries[0]
1507         else:
1508             for num, e in enumerate(entries, start=1):
1509                 # 'url' results don't have a title
1510                 if e.get('title') is not None:
1511                     e['title'] = '%s (%d)' % (e['title'], num)
1512             return {
1513                 '_type': 'playlist',
1514                 'entries': entries,
1515             }