Merge pull request #5376 from PeteHemery/ffmpeg-postproc-utime-bug
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7
8 from .common import InfoExtractor
9 from .youtube import YoutubeIE
10 from ..compat import (
11     compat_urllib_parse,
12     compat_urlparse,
13     compat_xml_parse_error,
14 )
15 from ..utils import (
16     determine_ext,
17     ExtractorError,
18     float_or_none,
19     HEADRequest,
20     is_html,
21     orderedSet,
22     parse_xml,
23     smuggle_url,
24     unescapeHTML,
25     unified_strdate,
26     unsmuggle_url,
27     UnsupportedError,
28     url_basename,
29     xpath_text,
30 )
31 from .brightcove import BrightcoveIE
32 from .nbc import NBCSportsVPlayerIE
33 from .ooyala import OoyalaIE
34 from .rutv import RUTVIE
35 from .smotri import SmotriIE
36 from .condenast import CondeNastIE
37 from .udn import UDNEmbedIE
38
39
40 class GenericIE(InfoExtractor):
41     IE_DESC = 'Generic downloader that works on some sites'
42     _VALID_URL = r'.*'
43     IE_NAME = 'generic'
44     _TESTS = [
45         {
46             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
47             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
48             'info_dict': {
49                 'id': '13601338388002',
50                 'ext': 'mp4',
51                 'uploader': 'www.hodiho.fr',
52                 'title': 'R\u00e9gis plante sa Jeep',
53             }
54         },
55         # bandcamp page with custom domain
56         {
57             'add_ie': ['Bandcamp'],
58             'url': 'http://bronyrock.com/track/the-pony-mash',
59             'info_dict': {
60                 'id': '3235767654',
61                 'ext': 'mp3',
62                 'title': 'The Pony Mash',
63                 'uploader': 'M_Pallante',
64             },
65             'skip': 'There is a limit of 200 free downloads / month for the test song',
66         },
67         # embedded brightcove video
68         # it also tests brightcove videos that need to set the 'Referer' in the
69         # http requests
70         {
71             'add_ie': ['Brightcove'],
72             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
73             'info_dict': {
74                 'id': '2765128793001',
75                 'ext': 'mp4',
76                 'title': 'Le cours de bourse : l’analyse technique',
77                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
78                 'uploader': 'BFM BUSINESS',
79             },
80             'params': {
81                 'skip_download': True,
82             },
83         },
84         {
85             # https://github.com/rg3/youtube-dl/issues/2253
86             'url': 'http://bcove.me/i6nfkrc3',
87             'md5': '0ba9446db037002366bab3b3eb30c88c',
88             'info_dict': {
89                 'id': '3101154703001',
90                 'ext': 'mp4',
91                 'title': 'Still no power',
92                 'uploader': 'thestar.com',
93                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
94             },
95             'add_ie': ['Brightcove'],
96         },
97         {
98             'url': 'http://www.championat.com/video/football/v/87/87499.html',
99             'md5': 'fb973ecf6e4a78a67453647444222983',
100             'info_dict': {
101                 'id': '3414141473001',
102                 'ext': 'mp4',
103                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
104                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
105                 'uploader': 'Championat',
106             },
107         },
108         {
109             # https://github.com/rg3/youtube-dl/issues/3541
110             'add_ie': ['Brightcove'],
111             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
112             'info_dict': {
113                 'id': '3866516442001',
114                 'ext': 'mp4',
115                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
116                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
117                 'uploader': 'SBS Broadcasting',
118             },
119             'skip': 'Restricted to Netherlands',
120             'params': {
121                 'skip_download': True,  # m3u8 download
122             },
123         },
124         # Direct link to a video
125         {
126             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
127             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
128             'info_dict': {
129                 'id': 'trailer',
130                 'ext': 'mp4',
131                 'title': 'trailer',
132                 'upload_date': '20100513',
133             }
134         },
135         # ooyala video
136         {
137             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
138             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
139             'info_dict': {
140                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
141                 'ext': 'mp4',
142                 'title': '2cc213299525360.mov',  # that's what we get
143             },
144             'add_ie': ['Ooyala'],
145         },
146         # multiple ooyala embeds on SBN network websites
147         {
148             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
149             'info_dict': {
150                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
151                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
152             },
153             'playlist_mincount': 3,
154             'params': {
155                 'skip_download': True,
156             },
157             'add_ie': ['Ooyala'],
158         },
159         # google redirect
160         {
161             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
162             'info_dict': {
163                 'id': 'cmQHVoWB5FY',
164                 'ext': 'mp4',
165                 'upload_date': '20130224',
166                 'uploader_id': 'TheVerge',
167                 'description': 're:^Chris Ziegler takes a look at the\.*',
168                 'uploader': 'The Verge',
169                 'title': 'First Firefox OS phones side-by-side',
170             },
171             'params': {
172                 'skip_download': False,
173             }
174         },
175         # embed.ly video
176         {
177             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
178             'info_dict': {
179                 'id': '9ODmcdjQcHQ',
180                 'ext': 'mp4',
181                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
182                 'upload_date': '20140225',
183                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
184                 'uploader': 'Tested',
185                 'uploader_id': 'testedcom',
186             },
187             # No need to test YoutubeIE here
188             'params': {
189                 'skip_download': True,
190             },
191         },
192         # funnyordie embed
193         {
194             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
195             'info_dict': {
196                 'id': '18e820ec3f',
197                 'ext': 'mp4',
198                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
199                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
200             },
201         },
202         # BBC iPlayer embeds
203         {
204             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
205             'info_dict': {
206                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
207             },
208             'playlist_mincount': 18,
209         },
210         # RUTV embed
211         {
212             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
213             'info_dict': {
214                 'id': '776940',
215                 'ext': 'mp4',
216                 'title': 'Охотское море стало целиком российским',
217                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
218             },
219             'params': {
220                 # m3u8 download
221                 'skip_download': True,
222             },
223         },
224         # Embedded TED video
225         {
226             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
227             'md5': '65fdff94098e4a607385a60c5177c638',
228             'info_dict': {
229                 'id': '1969',
230                 'ext': 'mp4',
231                 'title': 'Hidden miracles of the natural world',
232                 'uploader': 'Louie Schwartzberg',
233                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
234             }
235         },
236         # Embeded Ustream video
237         {
238             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
239             'md5': '27b99cdb639c9b12a79bca876a073417',
240             'info_dict': {
241                 'id': '45734260',
242                 'ext': 'flv',
243                 'uploader': 'AU SPA:  The NSA and Privacy',
244                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
245             }
246         },
247         # nowvideo embed hidden behind percent encoding
248         {
249             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
250             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
251             'info_dict': {
252                 'id': '06e53103ca9aa',
253                 'ext': 'flv',
254                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
255                 'description': 'No description',
256             },
257         },
258         # arte embed
259         {
260             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
261             'md5': '7653032cbb25bf6c80d80f217055fa43',
262             'info_dict': {
263                 'id': '048195-004_PLUS7-F',
264                 'ext': 'flv',
265                 'title': 'X:enius',
266                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
267                 'upload_date': '20140320',
268             },
269             'params': {
270                 'skip_download': 'Requires rtmpdump'
271             }
272         },
273         # Condé Nast embed
274         {
275             'url': 'http://www.wired.com/2014/04/honda-asimo/',
276             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
277             'info_dict': {
278                 'id': '53501be369702d3275860000',
279                 'ext': 'mp4',
280                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
281             }
282         },
283         # Dailymotion embed
284         {
285             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
286             'md5': '441aeeb82eb72c422c7f14ec533999cd',
287             'info_dict': {
288                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
289                 'ext': 'mp4',
290                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
291                 'uploader': 'Spi0n',
292             },
293             'add_ie': ['Dailymotion'],
294         },
295         # YouTube embed
296         {
297             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
298             'info_dict': {
299                 'id': 'FXRb4ykk4S0',
300                 'ext': 'mp4',
301                 'title': 'The NBL Auction 2014',
302                 'uploader': 'BADMINTON England',
303                 'uploader_id': 'BADMINTONEvents',
304                 'upload_date': '20140603',
305                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
306             },
307             'add_ie': ['Youtube'],
308             'params': {
309                 'skip_download': True,
310             }
311         },
312         # MTVSercices embed
313         {
314             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
315             'md5': '35727f82f58c76d996fc188f9755b0d5',
316             'info_dict': {
317                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
318                 'ext': 'mp4',
319                 'title': 'Review',
320                 'description': 'Mario\'s life in the fast lane has never looked so good.',
321             },
322         },
323         # YouTube embed via <data-embed-url="">
324         {
325             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
326             'info_dict': {
327                 'id': '4vAffPZIT44',
328                 'ext': 'mp4',
329                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
330                 'uploader': 'Gameloft',
331                 'uploader_id': 'gameloft',
332                 'upload_date': '20140828',
333                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
334             },
335             'params': {
336                 'skip_download': True,
337             }
338         },
339         # Camtasia studio
340         {
341             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
342             'playlist': [{
343                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
344                 'info_dict': {
345                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
346                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
347                     'ext': 'flv',
348                     'duration': 2235.90,
349                 }
350             }, {
351                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
352                 'info_dict': {
353                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
354                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
355                     'ext': 'flv',
356                     'duration': 2235.93,
357                 }
358             }],
359             'info_dict': {
360                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
361             }
362         },
363         # Flowplayer
364         {
365             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
366             'md5': '9d65602bf31c6e20014319c7d07fba27',
367             'info_dict': {
368                 'id': '5123ea6d5e5a7',
369                 'ext': 'mp4',
370                 'age_limit': 18,
371                 'uploader': 'www.handjobhub.com',
372                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
373             }
374         },
375         # RSS feed
376         {
377             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
378             'info_dict': {
379                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
380                 'title': 'Zero Punctuation',
381                 'description': 're:.*groundbreaking video review series.*'
382             },
383             'playlist_mincount': 11,
384         },
385         # Multiple brightcove videos
386         # https://github.com/rg3/youtube-dl/issues/2283
387         {
388             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
389             'info_dict': {
390                 'id': 'always-never',
391                 'title': 'Always / Never - The New Yorker',
392             },
393             'playlist_count': 3,
394             'params': {
395                 'extract_flat': False,
396                 'skip_download': True,
397             }
398         },
399         # MLB embed
400         {
401             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
402             'md5': '96f09a37e44da40dd083e12d9a683327',
403             'info_dict': {
404                 'id': '33322633',
405                 'ext': 'mp4',
406                 'title': 'Ump changes call to ball',
407                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
408                 'duration': 48,
409                 'timestamp': 1401537900,
410                 'upload_date': '20140531',
411                 'thumbnail': 're:^https?://.*\.jpg$',
412             },
413         },
414         # Wistia embed
415         {
416             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
417             'md5': '8788b683c777a5cf25621eaf286d0c23',
418             'info_dict': {
419                 'id': '1cfaf6b7ea',
420                 'ext': 'mov',
421                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
422                 'duration': 643.0,
423                 'filesize': 182808282,
424                 'uploader': 'education-portal.com',
425             },
426         },
427         {
428             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
429             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
430             'info_dict': {
431                 'id': 'uxjb0lwrcz',
432                 'ext': 'mp4',
433                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
434                 'duration': 1715.0,
435                 'uploader': 'thoughtworks.wistia.com',
436             },
437         },
438         # Direct download with broken HEAD
439         {
440             'url': 'http://ai-radio.org:8000/radio.opus',
441             'info_dict': {
442                 'id': 'radio',
443                 'ext': 'opus',
444                 'title': 'radio',
445             },
446             'params': {
447                 'skip_download': True,  # infinite live stream
448             },
449             'expected_warnings': [
450                 r'501.*Not Implemented'
451             ],
452         },
453         # Soundcloud embed
454         {
455             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
456             'info_dict': {
457                 'id': '174391317',
458                 'ext': 'mp3',
459                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
460                 'uploader': 'Sophos Security',
461                 'title': 'Chet Chat 171 - Oct 29, 2014',
462                 'upload_date': '20141029',
463             }
464         },
465         # Livestream embed
466         {
467             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
468             'info_dict': {
469                 'id': '67864563',
470                 'ext': 'flv',
471                 'upload_date': '20141112',
472                 'title': 'Rosetta #CometLanding webcast HL 10',
473             }
474         },
475         # LazyYT
476         {
477             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
478             'info_dict': {
479                 'id': '1986',
480                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
481             },
482             'playlist_mincount': 2,
483         },
484         # Direct link with incorrect MIME type
485         {
486             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
487             'md5': '4ccbebe5f36706d85221f204d7eb5913',
488             'info_dict': {
489                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
490                 'id': '5_Lennart_Poettering_-_Systemd',
491                 'ext': 'webm',
492                 'title': '5_Lennart_Poettering_-_Systemd',
493                 'upload_date': '20141120',
494             },
495             'expected_warnings': [
496                 'URL could be a direct video link, returning it as such.'
497             ]
498         },
499         # Cinchcast embed
500         {
501             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
502             'info_dict': {
503                 'id': '7141703',
504                 'ext': 'mp3',
505                 'upload_date': '20141126',
506                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
507             }
508         },
509         # Cinerama player
510         {
511             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
512             'info_dict': {
513                 'id': '730m_DandD_1901_512k',
514                 'ext': 'mp4',
515                 'uploader': 'www.abc.net.au',
516                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
517             }
518         },
519         # embedded viddler video
520         {
521             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
522             'info_dict': {
523                 'id': '4d03aad9',
524                 'ext': 'mp4',
525                 'uploader': 'deadspin',
526                 'title': 'WALL-TO-GORTAT',
527                 'timestamp': 1422285291,
528                 'upload_date': '20150126',
529             },
530             'add_ie': ['Viddler'],
531         },
532         # Libsyn embed
533         {
534             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
535             'info_dict': {
536                 'id': '3377616',
537                 'ext': 'mp3',
538                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
539                 'description': 'md5:601cb790edd05908957dae8aaa866465',
540                 'upload_date': '20150220',
541             },
542         },
543         # jwplayer YouTube
544         {
545             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
546             'info_dict': {
547                 'id': 'Mrj4DVp2zeA',
548                 'ext': 'mp4',
549                 'upload_date': '20150212',
550                 'uploader': 'The National Archives UK',
551                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
552                 'uploader_id': 'NationalArchives08',
553                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
554             },
555         },
556         # rtl.nl embed
557         {
558             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
559             'playlist_mincount': 5,
560             'info_dict': {
561                 'id': 'aanslagen-kopenhagen',
562                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
563             }
564         },
565         # Zapiks embed
566         {
567             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
568             'info_dict': {
569                 'id': '118046',
570                 'ext': 'mp4',
571                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
572             }
573         },
574         # Kaltura embed
575         {
576             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
577             'info_dict': {
578                 'id': '1_eergr3h1',
579                 'ext': 'mp4',
580                 'upload_date': '20150226',
581                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
582                 'timestamp': int,
583                 'title': 'John Carlson Postgame 2/25/15',
584             },
585         },
586         # Eagle.Platform embed (generic URL)
587         {
588             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
589             'info_dict': {
590                 'id': '227304',
591                 'ext': 'mp4',
592                 'title': 'Навальный вышел на свободу',
593                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
594                 'thumbnail': 're:^https?://.*\.jpg$',
595                 'duration': 87,
596                 'view_count': int,
597                 'age_limit': 0,
598             },
599         },
600         # ClipYou (Eagle.Platform) embed (custom URL)
601         {
602             'url': 'http://muz-tv.ru/play/7129/',
603             'info_dict': {
604                 'id': '12820',
605                 'ext': 'mp4',
606                 'title': "'O Sole Mio",
607                 'thumbnail': 're:^https?://.*\.jpg$',
608                 'duration': 216,
609                 'view_count': int,
610             },
611         },
612         # Pladform embed
613         {
614             'url': 'http://muz-tv.ru/kinozal/view/7400/',
615             'info_dict': {
616                 'id': '100183293',
617                 'ext': 'mp4',
618                 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
619                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
620                 'thumbnail': 're:^https?://.*\.jpg$',
621                 'duration': 694,
622                 'age_limit': 0,
623             },
624         },
625         # 5min embed
626         {
627             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
628             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
629             'info_dict': {
630                 'id': '518726732',
631                 'ext': 'mp4',
632                 'title': 'Facebook Creates "On This Day" | Crunch Report',
633             },
634         },
635         # RSS feed with enclosure
636         {
637             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
638             'info_dict': {
639                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
640                 'ext': 'm4v',
641                 'upload_date': '20150228',
642                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
643             }
644         },
645         # NBC Sports vplayer embed
646         {
647             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
648             'info_dict': {
649                 'id': 'ln7x1qSThw4k',
650                 'ext': 'flv',
651                 'title': "PFT Live: New leader in the 'new-look' defense",
652                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
653             },
654         },
655         # UDN embed
656         {
657             'url': 'http://www.udn.com/news/story/7314/822787',
658             'md5': 'de06b4c90b042c128395a88f0384817e',
659             'info_dict': {
660                 'id': '300040',
661                 'ext': 'mp4',
662                 'title': '生物老師男變女 全校挺"做自己"',
663                 'thumbnail': 're:^https?://.*\.jpg$',
664             }
665         }
666     ]
667
668     def report_following_redirect(self, new_url):
669         """Report information extraction."""
670         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
671
672     def _extract_rss(self, url, video_id, doc):
673         playlist_title = doc.find('./channel/title').text
674         playlist_desc_el = doc.find('./channel/description')
675         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
676
677         entries = []
678         for it in doc.findall('./channel/item'):
679             next_url = xpath_text(it, 'link', fatal=False)
680             if not next_url:
681                 enclosure_nodes = it.findall('./enclosure')
682                 for e in enclosure_nodes:
683                     next_url = e.attrib.get('url')
684                     if next_url:
685                         break
686
687             if not next_url:
688                 continue
689
690             entries.append({
691                 '_type': 'url',
692                 'url': next_url,
693                 'title': it.find('title').text,
694             })
695
696         return {
697             '_type': 'playlist',
698             'id': url,
699             'title': playlist_title,
700             'description': playlist_desc,
701             'entries': entries,
702         }
703
704     def _extract_camtasia(self, url, video_id, webpage):
705         """ Returns None if no camtasia video can be found. """
706
707         camtasia_cfg = self._search_regex(
708             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
709             webpage, 'camtasia configuration file', default=None)
710         if camtasia_cfg is None:
711             return None
712
713         title = self._html_search_meta('DC.title', webpage, fatal=True)
714
715         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
716         camtasia_cfg = self._download_xml(
717             camtasia_url, video_id,
718             note='Downloading camtasia configuration',
719             errnote='Failed to download camtasia configuration')
720         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
721
722         entries = []
723         for n in fileset_node.getchildren():
724             url_n = n.find('./uri')
725             if url_n is None:
726                 continue
727
728             entries.append({
729                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
730                 'title': '%s - %s' % (title, n.tag),
731                 'url': compat_urlparse.urljoin(url, url_n.text),
732                 'duration': float_or_none(n.find('./duration').text),
733             })
734
735         return {
736             '_type': 'playlist',
737             'entries': entries,
738             'title': title,
739         }
740
741     def _real_extract(self, url):
742         if url.startswith('//'):
743             return {
744                 '_type': 'url',
745                 'url': self.http_scheme() + url,
746             }
747
748         parsed_url = compat_urlparse.urlparse(url)
749         if not parsed_url.scheme:
750             default_search = self._downloader.params.get('default_search')
751             if default_search is None:
752                 default_search = 'fixup_error'
753
754             if default_search in ('auto', 'auto_warning', 'fixup_error'):
755                 if '/' in url:
756                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
757                     return self.url_result('http://' + url)
758                 elif default_search != 'fixup_error':
759                     if default_search == 'auto_warning':
760                         if re.match(r'^(?:url|URL)$', url):
761                             raise ExtractorError(
762                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
763                                 expected=True)
764                         else:
765                             self._downloader.report_warning(
766                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
767                     return self.url_result('ytsearch:' + url)
768
769             if default_search in ('error', 'fixup_error'):
770                 raise ExtractorError(
771                     '%r is not a valid URL. '
772                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
773                     % (url, url), expected=True)
774             else:
775                 if ':' not in default_search:
776                     default_search += ':'
777                 return self.url_result(default_search + url)
778
779         url, smuggled_data = unsmuggle_url(url)
780         force_videoid = None
781         is_intentional = smuggled_data and smuggled_data.get('to_generic')
782         if smuggled_data and 'force_videoid' in smuggled_data:
783             force_videoid = smuggled_data['force_videoid']
784             video_id = force_videoid
785         else:
786             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
787
788         self.to_screen('%s: Requesting header' % video_id)
789
790         head_req = HEADRequest(url)
791         head_response = self._request_webpage(
792             head_req, video_id,
793             note=False, errnote='Could not send HEAD request to %s' % url,
794             fatal=False)
795
796         if head_response is not False:
797             # Check for redirect
798             new_url = head_response.geturl()
799             if url != new_url:
800                 self.report_following_redirect(new_url)
801                 if force_videoid:
802                     new_url = smuggle_url(
803                         new_url, {'force_videoid': force_videoid})
804                 return self.url_result(new_url)
805
806         full_response = None
807         if head_response is False:
808             full_response = self._request_webpage(url, video_id)
809             head_response = full_response
810
811         # Check for direct link to a video
812         content_type = head_response.headers.get('Content-Type', '')
813         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
814         if m:
815             upload_date = unified_strdate(
816                 head_response.headers.get('Last-Modified'))
817             return {
818                 'id': video_id,
819                 'title': os.path.splitext(url_basename(url))[0],
820                 'direct': True,
821                 'formats': [{
822                     'format_id': m.group('format_id'),
823                     'url': url,
824                     'vcodec': 'none' if m.group('type') == 'audio' else None
825                 }],
826                 'upload_date': upload_date,
827             }
828
829         if not self._downloader.params.get('test', False) and not is_intentional:
830             self._downloader.report_warning('Falling back on generic information extractor.')
831
832         if not full_response:
833             full_response = self._request_webpage(url, video_id)
834
835         # Maybe it's a direct link to a video?
836         # Be careful not to download the whole thing!
837         first_bytes = full_response.read(512)
838         if not is_html(first_bytes):
839             self._downloader.report_warning(
840                 'URL could be a direct video link, returning it as such.')
841             upload_date = unified_strdate(
842                 head_response.headers.get('Last-Modified'))
843             return {
844                 'id': video_id,
845                 'title': os.path.splitext(url_basename(url))[0],
846                 'direct': True,
847                 'url': url,
848                 'upload_date': upload_date,
849             }
850
851         webpage = self._webpage_read_content(
852             full_response, url, video_id, prefix=first_bytes)
853
854         self.report_extraction(video_id)
855
856         # Is it an RSS feed?
857         try:
858             doc = parse_xml(webpage)
859             if doc.tag == 'rss':
860                 return self._extract_rss(url, video_id, doc)
861         except compat_xml_parse_error:
862             pass
863
864         # Is it a Camtasia project?
865         camtasia_res = self._extract_camtasia(url, video_id, webpage)
866         if camtasia_res is not None:
867             return camtasia_res
868
869         # Sometimes embedded video player is hidden behind percent encoding
870         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
871         # Unescaping the whole page allows to handle those cases in a generic way
872         webpage = compat_urllib_parse.unquote(webpage)
873
874         # it's tempting to parse this further, but you would
875         # have to take into account all the variations like
876         #   Video Title - Site Name
877         #   Site Name | Video Title
878         #   Video Title - Tagline | Site Name
879         # and so on and so forth; it's just not practical
880         video_title = self._html_search_regex(
881             r'(?s)<title>(.*?)</title>', webpage, 'video title',
882             default='video')
883
884         # Try to detect age limit automatically
885         age_limit = self._rta_search(webpage)
886         # And then there are the jokers who advertise that they use RTA,
887         # but actually don't.
888         AGE_LIMIT_MARKERS = [
889             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
890         ]
891         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
892             age_limit = 18
893
894         # video uploader is domain name
895         video_uploader = self._search_regex(
896             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
897
898         # Helper method
899         def _playlist_from_matches(matches, getter=None, ie=None):
900             urlrs = orderedSet(
901                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
902                 for m in matches)
903             return self.playlist_result(
904                 urlrs, playlist_id=video_id, playlist_title=video_title)
905
906         # Look for BrightCove:
907         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
908         if bc_urls:
909             self.to_screen('Brightcove video detected.')
910             entries = [{
911                 '_type': 'url',
912                 'url': smuggle_url(bc_url, {'Referer': url}),
913                 'ie_key': 'Brightcove'
914             } for bc_url in bc_urls]
915
916             return {
917                 '_type': 'playlist',
918                 'title': video_title,
919                 'id': video_id,
920                 'entries': entries,
921             }
922
923         # Look for embedded rtl.nl player
924         matches = re.findall(
925             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
926             webpage)
927         if matches:
928             return _playlist_from_matches(matches, ie='RtlNl')
929
930         # Look for embedded (iframe) Vimeo player
931         mobj = re.search(
932             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
933         if mobj:
934             player_url = unescapeHTML(mobj.group('url'))
935             surl = smuggle_url(player_url, {'Referer': url})
936             return self.url_result(surl)
937         # Look for embedded (swf embed) Vimeo player
938         mobj = re.search(
939             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
940         if mobj:
941             return self.url_result(mobj.group(1))
942
943         # Look for embedded YouTube player
944         matches = re.findall(r'''(?x)
945             (?:
946                 <iframe[^>]+?src=|
947                 data-video-url=|
948                 <embed[^>]+?src=|
949                 embedSWF\(?:\s*|
950                 new\s+SWFObject\(
951             )
952             (["\'])
953                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
954                 (?:embed|v|p)/.+?)
955             \1''', webpage)
956         if matches:
957             return _playlist_from_matches(
958                 matches, lambda m: unescapeHTML(m[1]))
959
960         # Look for lazyYT YouTube embed
961         matches = re.findall(
962             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
963         if matches:
964             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
965
966         # Look for embedded Dailymotion player
967         matches = re.findall(
968             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
969         if matches:
970             return _playlist_from_matches(
971                 matches, lambda m: unescapeHTML(m[1]))
972
973         # Look for embedded Dailymotion playlist player (#3822)
974         m = re.search(
975             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
976         if m:
977             playlists = re.findall(
978                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
979             if playlists:
980                 return _playlist_from_matches(
981                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
982
983         # Look for embedded Wistia player
984         match = re.search(
985             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
986         if match:
987             embed_url = self._proto_relative_url(
988                 unescapeHTML(match.group('url')))
989             return {
990                 '_type': 'url_transparent',
991                 'url': embed_url,
992                 'ie_key': 'Wistia',
993                 'uploader': video_uploader,
994                 'title': video_title,
995                 'id': video_id,
996             }
997
998         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
999         if match:
1000             return {
1001                 '_type': 'url_transparent',
1002                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1003                 'ie_key': 'Wistia',
1004                 'uploader': video_uploader,
1005                 'title': video_title,
1006                 'id': match.group('id')
1007             }
1008
1009         # Look for embedded blip.tv player
1010         mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
1011         if mobj:
1012             return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
1013         mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
1014         if mobj:
1015             return self.url_result(mobj.group(1), 'BlipTV')
1016
1017         # Look for embedded condenast player
1018         matches = re.findall(
1019             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1020             webpage)
1021         if matches:
1022             return {
1023                 '_type': 'playlist',
1024                 'entries': [{
1025                     '_type': 'url',
1026                     'ie_key': 'CondeNast',
1027                     'url': ma,
1028                 } for ma in matches],
1029                 'title': video_title,
1030                 'id': video_id,
1031             }
1032
1033         # Look for Bandcamp pages with custom domain
1034         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1035         if mobj is not None:
1036             burl = unescapeHTML(mobj.group(1))
1037             # Don't set the extractor because it can be a track url or an album
1038             return self.url_result(burl)
1039
1040         # Look for embedded Vevo player
1041         mobj = re.search(
1042             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1043         if mobj is not None:
1044             return self.url_result(mobj.group('url'))
1045
1046         # Look for embedded Viddler player
1047         mobj = re.search(
1048             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1049             webpage)
1050         if mobj is not None:
1051             return self.url_result(mobj.group('url'))
1052
1053         # Look for NYTimes player
1054         mobj = re.search(
1055             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1056             webpage)
1057         if mobj is not None:
1058             return self.url_result(mobj.group('url'))
1059
1060         # Look for Libsyn player
1061         mobj = re.search(
1062             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1063         if mobj is not None:
1064             return self.url_result(mobj.group('url'))
1065
1066         # Look for Ooyala videos
1067         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1068                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1069                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
1070         if mobj is not None:
1071             return OoyalaIE._build_url_result(mobj.group('ec'))
1072
1073         # Look for multiple Ooyala embeds on SBN network websites
1074         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1075         if mobj is not None:
1076             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1077             if embeds:
1078                 return _playlist_from_matches(
1079                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1080
1081         # Look for Aparat videos
1082         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1083         if mobj is not None:
1084             return self.url_result(mobj.group(1), 'Aparat')
1085
1086         # Look for MPORA videos
1087         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1088         if mobj is not None:
1089             return self.url_result(mobj.group(1), 'Mpora')
1090
1091         # Look for embedded NovaMov-based player
1092         mobj = re.search(
1093             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1094                     (?P<url>http://(?:(?:embed|www)\.)?
1095                         (?:novamov\.com|
1096                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1097                            videoweed\.(?:es|com)|
1098                            movshare\.(?:net|sx|ag)|
1099                            divxstage\.(?:eu|net|ch|co|at|ag))
1100                         /embed\.php.+?)\1''', webpage)
1101         if mobj is not None:
1102             return self.url_result(mobj.group('url'))
1103
1104         # Look for embedded Facebook player
1105         mobj = re.search(
1106             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1107         if mobj is not None:
1108             return self.url_result(mobj.group('url'), 'Facebook')
1109
1110         # Look for embedded VK player
1111         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1112         if mobj is not None:
1113             return self.url_result(mobj.group('url'), 'VK')
1114
1115         # Look for embedded ivi player
1116         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1117         if mobj is not None:
1118             return self.url_result(mobj.group('url'), 'Ivi')
1119
1120         # Look for embedded Huffington Post player
1121         mobj = re.search(
1122             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1123         if mobj is not None:
1124             return self.url_result(mobj.group('url'), 'HuffPost')
1125
1126         # Look for embed.ly
1127         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1128         if mobj is not None:
1129             return self.url_result(mobj.group('url'))
1130         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1131         if mobj is not None:
1132             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1133
1134         # Look for funnyordie embed
1135         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1136         if matches:
1137             return _playlist_from_matches(
1138                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1139
1140         # Look for BBC iPlayer embed
1141         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1142         if matches:
1143             return _playlist_from_matches(matches, ie='BBCCoUk')
1144
1145         # Look for embedded RUTV player
1146         rutv_url = RUTVIE._extract_url(webpage)
1147         if rutv_url:
1148             return self.url_result(rutv_url, 'RUTV')
1149
1150         # Look for embedded TED player
1151         mobj = re.search(
1152             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1153         if mobj is not None:
1154             return self.url_result(mobj.group('url'), 'TED')
1155
1156         # Look for embedded Ustream videos
1157         mobj = re.search(
1158             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1159         if mobj is not None:
1160             return self.url_result(mobj.group('url'), 'Ustream')
1161
1162         # Look for embedded arte.tv player
1163         mobj = re.search(
1164             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1165             webpage)
1166         if mobj is not None:
1167             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1168
1169         # Look for embedded smotri.com player
1170         smotri_url = SmotriIE._extract_url(webpage)
1171         if smotri_url:
1172             return self.url_result(smotri_url, 'Smotri')
1173
1174         # Look for embeded soundcloud player
1175         mobj = re.search(
1176             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1177             webpage)
1178         if mobj is not None:
1179             url = unescapeHTML(mobj.group('url'))
1180             return self.url_result(url)
1181
1182         # Look for embedded vulture.com player
1183         mobj = re.search(
1184             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1185             webpage)
1186         if mobj is not None:
1187             url = unescapeHTML(mobj.group('url'))
1188             return self.url_result(url, ie='Vulture')
1189
1190         # Look for embedded mtvservices player
1191         mobj = re.search(
1192             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1193             webpage)
1194         if mobj is not None:
1195             url = unescapeHTML(mobj.group('url'))
1196             return self.url_result(url, ie='MTVServicesEmbedded')
1197
1198         # Look for embedded yahoo player
1199         mobj = re.search(
1200             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1201             webpage)
1202         if mobj is not None:
1203             return self.url_result(mobj.group('url'), 'Yahoo')
1204
1205         # Look for embedded sbs.com.au player
1206         mobj = re.search(
1207             r'''(?x)
1208             (?:
1209                 <meta\s+property="og:video"\s+content=|
1210                 <iframe[^>]+?src=
1211             )
1212             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1213             webpage)
1214         if mobj is not None:
1215             return self.url_result(mobj.group('url'), 'SBS')
1216
1217         # Look for embedded Cinchcast player
1218         mobj = re.search(
1219             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1220             webpage)
1221         if mobj is not None:
1222             return self.url_result(mobj.group('url'), 'Cinchcast')
1223
1224         mobj = re.search(
1225             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1226             webpage)
1227         if mobj is not None:
1228             return self.url_result(mobj.group('url'), 'MLB')
1229
1230         mobj = re.search(
1231             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1232             webpage)
1233         if mobj is not None:
1234             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1235
1236         mobj = re.search(
1237             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1238             webpage)
1239         if mobj is not None:
1240             return self.url_result(mobj.group('url'), 'Livestream')
1241
1242         # Look for Zapiks embed
1243         mobj = re.search(
1244             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1245         if mobj is not None:
1246             return self.url_result(mobj.group('url'), 'Zapiks')
1247
1248         # Look for Kaltura embeds
1249         mobj = re.search(
1250             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1251         if mobj is not None:
1252             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1253
1254         # Look for Eagle.Platform embeds
1255         mobj = re.search(
1256             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1257         if mobj is not None:
1258             return self.url_result(mobj.group('url'), 'EaglePlatform')
1259
1260         # Look for ClipYou (uses Eagle.Platform) embeds
1261         mobj = re.search(
1262             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1263         if mobj is not None:
1264             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1265
1266         # Look for Pladform embeds
1267         mobj = re.search(
1268             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1269         if mobj is not None:
1270             return self.url_result(mobj.group('url'), 'Pladform')
1271
1272         # Look for 5min embeds
1273         mobj = re.search(
1274             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1275         if mobj is not None:
1276             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1277
1278         # Look for NBC Sports VPlayer embeds
1279         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1280         if nbc_sports_url:
1281             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1282
1283         # Look for UDN embeds
1284         mobj = re.search(
1285             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1286         if mobj is not None:
1287             return self.url_result(
1288                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1289
1290         def check_video(vurl):
1291             if YoutubeIE.suitable(vurl):
1292                 return True
1293             vpath = compat_urlparse.urlparse(vurl).path
1294             vext = determine_ext(vpath)
1295             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1296
1297         def filter_video(urls):
1298             return list(filter(check_video, urls))
1299
1300         # Start with something easy: JW Player in SWFObject
1301         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1302         if not found:
1303             # Look for gorilla-vid style embedding
1304             found = filter_video(re.findall(r'''(?sx)
1305                 (?:
1306                     jw_plugins|
1307                     JWPlayerOptions|
1308                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1309                 )
1310                 .*?
1311                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1312         if not found:
1313             # Broaden the search a little bit
1314             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1315         if not found:
1316             # Broaden the findall a little bit: JWPlayer JS loader
1317             found = filter_video(re.findall(
1318                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1319         if not found:
1320             # Flow player
1321             found = filter_video(re.findall(r'''(?xs)
1322                 flowplayer\("[^"]+",\s*
1323                     \{[^}]+?\}\s*,
1324                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1325                         ["']?url["']?\s*:\s*["']([^"']+)["']
1326             ''', webpage))
1327         if not found:
1328             # Cinerama player
1329             found = re.findall(
1330                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1331         if not found:
1332             # Try to find twitter cards info
1333             found = filter_video(re.findall(
1334                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1335         if not found:
1336             # We look for Open Graph info:
1337             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1338             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1339             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1340             if m_video_type is not None:
1341                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1342         if not found:
1343             # HTML5 video
1344             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1345         if not found:
1346             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1347             found = re.search(
1348                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1349                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1350                 webpage)
1351             if not found:
1352                 # Look also in Refresh HTTP header
1353                 refresh_header = head_response.headers.get('Refresh')
1354                 if refresh_header:
1355                     found = re.search(REDIRECT_REGEX, refresh_header)
1356             if found:
1357                 new_url = found.group(1)
1358                 self.report_following_redirect(new_url)
1359                 return {
1360                     '_type': 'url',
1361                     'url': new_url,
1362                 }
1363         if not found:
1364             raise UnsupportedError(url)
1365
1366         entries = []
1367         for video_url in found:
1368             video_url = compat_urlparse.urljoin(url, video_url)
1369             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1370
1371             # Sometimes, jwplayer extraction will result in a YouTube URL
1372             if YoutubeIE.suitable(video_url):
1373                 entries.append(self.url_result(video_url, 'Youtube'))
1374                 continue
1375
1376             # here's a fun little line of code for you:
1377             video_id = os.path.splitext(video_id)[0]
1378
1379             entries.append({
1380                 'id': video_id,
1381                 'url': video_url,
1382                 'uploader': video_uploader,
1383                 'title': video_title,
1384                 'age_limit': age_limit,
1385             })
1386
1387         if len(entries) == 1:
1388             return entries[0]
1389         else:
1390             for num, e in enumerate(entries, start=1):
1391                 # 'url' results don't have a title
1392                 if e.get('title') is not None:
1393                     e['title'] = '%s (%d)' % (e['title'], num)
1394             return {
1395                 '_type': 'playlist',
1396                 'entries': entries,
1397             }