[generic] Add test for playwire embed (#5430)
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7
8 from .common import InfoExtractor
9 from .youtube import YoutubeIE
10 from ..compat import (
11     compat_urllib_parse,
12     compat_urlparse,
13     compat_xml_parse_error,
14 )
15 from ..utils import (
16     determine_ext,
17     ExtractorError,
18     float_or_none,
19     HEADRequest,
20     is_html,
21     orderedSet,
22     parse_xml,
23     smuggle_url,
24     unescapeHTML,
25     unified_strdate,
26     unsmuggle_url,
27     UnsupportedError,
28     url_basename,
29     xpath_text,
30 )
31 from .brightcove import BrightcoveIE
32 from .nbc import NBCSportsVPlayerIE
33 from .ooyala import OoyalaIE
34 from .rutv import RUTVIE
35 from .smotri import SmotriIE
36 from .condenast import CondeNastIE
37 from .udn import UDNEmbedIE
38
39
40 class GenericIE(InfoExtractor):
41     IE_DESC = 'Generic downloader that works on some sites'
42     _VALID_URL = r'.*'
43     IE_NAME = 'generic'
44     _TESTS = [
45         {
46             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
47             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
48             'info_dict': {
49                 'id': '13601338388002',
50                 'ext': 'mp4',
51                 'uploader': 'www.hodiho.fr',
52                 'title': 'R\u00e9gis plante sa Jeep',
53             }
54         },
55         # bandcamp page with custom domain
56         {
57             'add_ie': ['Bandcamp'],
58             'url': 'http://bronyrock.com/track/the-pony-mash',
59             'info_dict': {
60                 'id': '3235767654',
61                 'ext': 'mp3',
62                 'title': 'The Pony Mash',
63                 'uploader': 'M_Pallante',
64             },
65             'skip': 'There is a limit of 200 free downloads / month for the test song',
66         },
67         # embedded brightcove video
68         # it also tests brightcove videos that need to set the 'Referer' in the
69         # http requests
70         {
71             'add_ie': ['Brightcove'],
72             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
73             'info_dict': {
74                 'id': '2765128793001',
75                 'ext': 'mp4',
76                 'title': 'Le cours de bourse : l’analyse technique',
77                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
78                 'uploader': 'BFM BUSINESS',
79             },
80             'params': {
81                 'skip_download': True,
82             },
83         },
84         {
85             # https://github.com/rg3/youtube-dl/issues/2253
86             'url': 'http://bcove.me/i6nfkrc3',
87             'md5': '0ba9446db037002366bab3b3eb30c88c',
88             'info_dict': {
89                 'id': '3101154703001',
90                 'ext': 'mp4',
91                 'title': 'Still no power',
92                 'uploader': 'thestar.com',
93                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
94             },
95             'add_ie': ['Brightcove'],
96         },
97         {
98             'url': 'http://www.championat.com/video/football/v/87/87499.html',
99             'md5': 'fb973ecf6e4a78a67453647444222983',
100             'info_dict': {
101                 'id': '3414141473001',
102                 'ext': 'mp4',
103                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
104                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
105                 'uploader': 'Championat',
106             },
107         },
108         {
109             # https://github.com/rg3/youtube-dl/issues/3541
110             'add_ie': ['Brightcove'],
111             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
112             'info_dict': {
113                 'id': '3866516442001',
114                 'ext': 'mp4',
115                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
116                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
117                 'uploader': 'SBS Broadcasting',
118             },
119             'skip': 'Restricted to Netherlands',
120             'params': {
121                 'skip_download': True,  # m3u8 download
122             },
123         },
124         # Direct link to a video
125         {
126             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
127             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
128             'info_dict': {
129                 'id': 'trailer',
130                 'ext': 'mp4',
131                 'title': 'trailer',
132                 'upload_date': '20100513',
133             }
134         },
135         # ooyala video
136         {
137             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
138             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
139             'info_dict': {
140                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
141                 'ext': 'mp4',
142                 'title': '2cc213299525360.mov',  # that's what we get
143             },
144             'add_ie': ['Ooyala'],
145         },
146         # multiple ooyala embeds on SBN network websites
147         {
148             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
149             'info_dict': {
150                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
151                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
152             },
153             'playlist_mincount': 3,
154             'params': {
155                 'skip_download': True,
156             },
157             'add_ie': ['Ooyala'],
158         },
159         # google redirect
160         {
161             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
162             'info_dict': {
163                 'id': 'cmQHVoWB5FY',
164                 'ext': 'mp4',
165                 'upload_date': '20130224',
166                 'uploader_id': 'TheVerge',
167                 'description': 're:^Chris Ziegler takes a look at the\.*',
168                 'uploader': 'The Verge',
169                 'title': 'First Firefox OS phones side-by-side',
170             },
171             'params': {
172                 'skip_download': False,
173             }
174         },
175         # embed.ly video
176         {
177             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
178             'info_dict': {
179                 'id': '9ODmcdjQcHQ',
180                 'ext': 'mp4',
181                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
182                 'upload_date': '20140225',
183                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
184                 'uploader': 'Tested',
185                 'uploader_id': 'testedcom',
186             },
187             # No need to test YoutubeIE here
188             'params': {
189                 'skip_download': True,
190             },
191         },
192         # funnyordie embed
193         {
194             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
195             'info_dict': {
196                 'id': '18e820ec3f',
197                 'ext': 'mp4',
198                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
199                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
200             },
201         },
202         # BBC iPlayer embeds
203         {
204             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
205             'info_dict': {
206                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
207             },
208             'playlist_mincount': 18,
209         },
210         # RUTV embed
211         {
212             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
213             'info_dict': {
214                 'id': '776940',
215                 'ext': 'mp4',
216                 'title': 'Охотское море стало целиком российским',
217                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
218             },
219             'params': {
220                 # m3u8 download
221                 'skip_download': True,
222             },
223         },
224         # Embedded TED video
225         {
226             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
227             'md5': '65fdff94098e4a607385a60c5177c638',
228             'info_dict': {
229                 'id': '1969',
230                 'ext': 'mp4',
231                 'title': 'Hidden miracles of the natural world',
232                 'uploader': 'Louie Schwartzberg',
233                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
234             }
235         },
236         # Embeded Ustream video
237         {
238             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
239             'md5': '27b99cdb639c9b12a79bca876a073417',
240             'info_dict': {
241                 'id': '45734260',
242                 'ext': 'flv',
243                 'uploader': 'AU SPA:  The NSA and Privacy',
244                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
245             }
246         },
247         # nowvideo embed hidden behind percent encoding
248         {
249             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
250             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
251             'info_dict': {
252                 'id': '06e53103ca9aa',
253                 'ext': 'flv',
254                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
255                 'description': 'No description',
256             },
257         },
258         # arte embed
259         {
260             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
261             'md5': '7653032cbb25bf6c80d80f217055fa43',
262             'info_dict': {
263                 'id': '048195-004_PLUS7-F',
264                 'ext': 'flv',
265                 'title': 'X:enius',
266                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
267                 'upload_date': '20140320',
268             },
269             'params': {
270                 'skip_download': 'Requires rtmpdump'
271             }
272         },
273         # Condé Nast embed
274         {
275             'url': 'http://www.wired.com/2014/04/honda-asimo/',
276             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
277             'info_dict': {
278                 'id': '53501be369702d3275860000',
279                 'ext': 'mp4',
280                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
281             }
282         },
283         # Dailymotion embed
284         {
285             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
286             'md5': '441aeeb82eb72c422c7f14ec533999cd',
287             'info_dict': {
288                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
289                 'ext': 'mp4',
290                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
291                 'uploader': 'Spi0n',
292             },
293             'add_ie': ['Dailymotion'],
294         },
295         # YouTube embed
296         {
297             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
298             'info_dict': {
299                 'id': 'FXRb4ykk4S0',
300                 'ext': 'mp4',
301                 'title': 'The NBL Auction 2014',
302                 'uploader': 'BADMINTON England',
303                 'uploader_id': 'BADMINTONEvents',
304                 'upload_date': '20140603',
305                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
306             },
307             'add_ie': ['Youtube'],
308             'params': {
309                 'skip_download': True,
310             }
311         },
312         # MTVSercices embed
313         {
314             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
315             'md5': '35727f82f58c76d996fc188f9755b0d5',
316             'info_dict': {
317                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
318                 'ext': 'mp4',
319                 'title': 'Review',
320                 'description': 'Mario\'s life in the fast lane has never looked so good.',
321             },
322         },
323         # YouTube embed via <data-embed-url="">
324         {
325             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
326             'info_dict': {
327                 'id': '4vAffPZIT44',
328                 'ext': 'mp4',
329                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
330                 'uploader': 'Gameloft',
331                 'uploader_id': 'gameloft',
332                 'upload_date': '20140828',
333                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
334             },
335             'params': {
336                 'skip_download': True,
337             }
338         },
339         # Camtasia studio
340         {
341             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
342             'playlist': [{
343                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
344                 'info_dict': {
345                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
346                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
347                     'ext': 'flv',
348                     'duration': 2235.90,
349                 }
350             }, {
351                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
352                 'info_dict': {
353                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
354                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
355                     'ext': 'flv',
356                     'duration': 2235.93,
357                 }
358             }],
359             'info_dict': {
360                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
361             }
362         },
363         # Flowplayer
364         {
365             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
366             'md5': '9d65602bf31c6e20014319c7d07fba27',
367             'info_dict': {
368                 'id': '5123ea6d5e5a7',
369                 'ext': 'mp4',
370                 'age_limit': 18,
371                 'uploader': 'www.handjobhub.com',
372                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
373             }
374         },
375         # RSS feed
376         {
377             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
378             'info_dict': {
379                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
380                 'title': 'Zero Punctuation',
381                 'description': 're:.*groundbreaking video review series.*'
382             },
383             'playlist_mincount': 11,
384         },
385         # Multiple brightcove videos
386         # https://github.com/rg3/youtube-dl/issues/2283
387         {
388             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
389             'info_dict': {
390                 'id': 'always-never',
391                 'title': 'Always / Never - The New Yorker',
392             },
393             'playlist_count': 3,
394             'params': {
395                 'extract_flat': False,
396                 'skip_download': True,
397             }
398         },
399         # MLB embed
400         {
401             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
402             'md5': '96f09a37e44da40dd083e12d9a683327',
403             'info_dict': {
404                 'id': '33322633',
405                 'ext': 'mp4',
406                 'title': 'Ump changes call to ball',
407                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
408                 'duration': 48,
409                 'timestamp': 1401537900,
410                 'upload_date': '20140531',
411                 'thumbnail': 're:^https?://.*\.jpg$',
412             },
413         },
414         # Wistia embed
415         {
416             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
417             'md5': '8788b683c777a5cf25621eaf286d0c23',
418             'info_dict': {
419                 'id': '1cfaf6b7ea',
420                 'ext': 'mov',
421                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
422                 'duration': 643.0,
423                 'filesize': 182808282,
424                 'uploader': 'education-portal.com',
425             },
426         },
427         {
428             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
429             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
430             'info_dict': {
431                 'id': 'uxjb0lwrcz',
432                 'ext': 'mp4',
433                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
434                 'duration': 1715.0,
435                 'uploader': 'thoughtworks.wistia.com',
436             },
437         },
438         # Direct download with broken HEAD
439         {
440             'url': 'http://ai-radio.org:8000/radio.opus',
441             'info_dict': {
442                 'id': 'radio',
443                 'ext': 'opus',
444                 'title': 'radio',
445             },
446             'params': {
447                 'skip_download': True,  # infinite live stream
448             },
449             'expected_warnings': [
450                 r'501.*Not Implemented'
451             ],
452         },
453         # Soundcloud embed
454         {
455             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
456             'info_dict': {
457                 'id': '174391317',
458                 'ext': 'mp3',
459                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
460                 'uploader': 'Sophos Security',
461                 'title': 'Chet Chat 171 - Oct 29, 2014',
462                 'upload_date': '20141029',
463             }
464         },
465         # Livestream embed
466         {
467             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
468             'info_dict': {
469                 'id': '67864563',
470                 'ext': 'flv',
471                 'upload_date': '20141112',
472                 'title': 'Rosetta #CometLanding webcast HL 10',
473             }
474         },
475         # LazyYT
476         {
477             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
478             'info_dict': {
479                 'id': '1986',
480                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
481             },
482             'playlist_mincount': 2,
483         },
484         # Direct link with incorrect MIME type
485         {
486             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
487             'md5': '4ccbebe5f36706d85221f204d7eb5913',
488             'info_dict': {
489                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
490                 'id': '5_Lennart_Poettering_-_Systemd',
491                 'ext': 'webm',
492                 'title': '5_Lennart_Poettering_-_Systemd',
493                 'upload_date': '20141120',
494             },
495             'expected_warnings': [
496                 'URL could be a direct video link, returning it as such.'
497             ]
498         },
499         # Cinchcast embed
500         {
501             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
502             'info_dict': {
503                 'id': '7141703',
504                 'ext': 'mp3',
505                 'upload_date': '20141126',
506                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
507             }
508         },
509         # Cinerama player
510         {
511             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
512             'info_dict': {
513                 'id': '730m_DandD_1901_512k',
514                 'ext': 'mp4',
515                 'uploader': 'www.abc.net.au',
516                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
517             }
518         },
519         # embedded viddler video
520         {
521             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
522             'info_dict': {
523                 'id': '4d03aad9',
524                 'ext': 'mp4',
525                 'uploader': 'deadspin',
526                 'title': 'WALL-TO-GORTAT',
527                 'timestamp': 1422285291,
528                 'upload_date': '20150126',
529             },
530             'add_ie': ['Viddler'],
531         },
532         # Libsyn embed
533         {
534             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
535             'info_dict': {
536                 'id': '3377616',
537                 'ext': 'mp3',
538                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
539                 'description': 'md5:601cb790edd05908957dae8aaa866465',
540                 'upload_date': '20150220',
541             },
542         },
543         # jwplayer YouTube
544         {
545             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
546             'info_dict': {
547                 'id': 'Mrj4DVp2zeA',
548                 'ext': 'mp4',
549                 'upload_date': '20150212',
550                 'uploader': 'The National Archives UK',
551                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
552                 'uploader_id': 'NationalArchives08',
553                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
554             },
555         },
556         # rtl.nl embed
557         {
558             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
559             'playlist_mincount': 5,
560             'info_dict': {
561                 'id': 'aanslagen-kopenhagen',
562                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
563             }
564         },
565         # Zapiks embed
566         {
567             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
568             'info_dict': {
569                 'id': '118046',
570                 'ext': 'mp4',
571                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
572             }
573         },
574         # Kaltura embed
575         {
576             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
577             'info_dict': {
578                 'id': '1_eergr3h1',
579                 'ext': 'mp4',
580                 'upload_date': '20150226',
581                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
582                 'timestamp': int,
583                 'title': 'John Carlson Postgame 2/25/15',
584             },
585         },
586         # Eagle.Platform embed (generic URL)
587         {
588             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
589             'info_dict': {
590                 'id': '227304',
591                 'ext': 'mp4',
592                 'title': 'Навальный вышел на свободу',
593                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
594                 'thumbnail': 're:^https?://.*\.jpg$',
595                 'duration': 87,
596                 'view_count': int,
597                 'age_limit': 0,
598             },
599         },
600         # ClipYou (Eagle.Platform) embed (custom URL)
601         {
602             'url': 'http://muz-tv.ru/play/7129/',
603             'info_dict': {
604                 'id': '12820',
605                 'ext': 'mp4',
606                 'title': "'O Sole Mio",
607                 'thumbnail': 're:^https?://.*\.jpg$',
608                 'duration': 216,
609                 'view_count': int,
610             },
611         },
612         # Pladform embed
613         {
614             'url': 'http://muz-tv.ru/kinozal/view/7400/',
615             'info_dict': {
616                 'id': '100183293',
617                 'ext': 'mp4',
618                 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
619                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
620                 'thumbnail': 're:^https?://.*\.jpg$',
621                 'duration': 694,
622                 'age_limit': 0,
623             },
624         },
625         # Playwire embed
626         {
627             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
628             'info_dict': {
629                 'id': '3519514',
630                 'ext': 'mp4',
631                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
632                 'thumbnail': 're:^https?://.*\.png$',
633                 'duration': 45.115,
634             },
635         },
636         # 5min embed
637         {
638             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
639             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
640             'info_dict': {
641                 'id': '518726732',
642                 'ext': 'mp4',
643                 'title': 'Facebook Creates "On This Day" | Crunch Report',
644             },
645         },
646         # RSS feed with enclosure
647         {
648             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
649             'info_dict': {
650                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
651                 'ext': 'm4v',
652                 'upload_date': '20150228',
653                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
654             }
655         },
656         # Crooks and Liars embed
657         {
658             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
659             'info_dict': {
660                 'id': '8RUoRhRi',
661                 'ext': 'mp4',
662                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
663                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
664                 'timestamp': 1428207000,
665                 'upload_date': '20150405',
666                 'uploader': 'Heather',
667             },
668         },
669         # Crooks and Liars external embed
670         {
671             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
672             'info_dict': {
673                 'id': 'MTE3MjUtMzQ2MzA',
674                 'ext': 'mp4',
675                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
676                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
677                 'timestamp': 1265032391,
678                 'upload_date': '20100201',
679                 'uploader': 'Heather',
680             },
681         },
682         # NBC Sports vplayer embed
683         {
684             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
685             'info_dict': {
686                 'id': 'ln7x1qSThw4k',
687                 'ext': 'flv',
688                 'title': "PFT Live: New leader in the 'new-look' defense",
689                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
690             },
691         },
692         # UDN embed
693         {
694             'url': 'http://www.udn.com/news/story/7314/822787',
695             'md5': 'fd2060e988c326991037b9aff9df21a6',
696             'info_dict': {
697                 'id': '300346',
698                 'ext': 'mp4',
699                 'title': '中一中男師變性 全校師生力挺',
700                 'thumbnail': 're:^https?://.*\.jpg$',
701             }
702         },
703         # Ooyala embed
704         {
705             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
706             'info_dict': {
707                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
708                 'ext': 'mp4',
709                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
710                 'title': 'This is what separates the Excel masters from the wannabes',
711             },
712             'params': {
713                 # m3u8 downloads
714                 'skip_download': True,
715             }
716         }
717     ]
718
719     def report_following_redirect(self, new_url):
720         """Report information extraction."""
721         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
722
723     def _extract_rss(self, url, video_id, doc):
724         playlist_title = doc.find('./channel/title').text
725         playlist_desc_el = doc.find('./channel/description')
726         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
727
728         entries = []
729         for it in doc.findall('./channel/item'):
730             next_url = xpath_text(it, 'link', fatal=False)
731             if not next_url:
732                 enclosure_nodes = it.findall('./enclosure')
733                 for e in enclosure_nodes:
734                     next_url = e.attrib.get('url')
735                     if next_url:
736                         break
737
738             if not next_url:
739                 continue
740
741             entries.append({
742                 '_type': 'url',
743                 'url': next_url,
744                 'title': it.find('title').text,
745             })
746
747         return {
748             '_type': 'playlist',
749             'id': url,
750             'title': playlist_title,
751             'description': playlist_desc,
752             'entries': entries,
753         }
754
755     def _extract_camtasia(self, url, video_id, webpage):
756         """ Returns None if no camtasia video can be found. """
757
758         camtasia_cfg = self._search_regex(
759             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
760             webpage, 'camtasia configuration file', default=None)
761         if camtasia_cfg is None:
762             return None
763
764         title = self._html_search_meta('DC.title', webpage, fatal=True)
765
766         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
767         camtasia_cfg = self._download_xml(
768             camtasia_url, video_id,
769             note='Downloading camtasia configuration',
770             errnote='Failed to download camtasia configuration')
771         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
772
773         entries = []
774         for n in fileset_node.getchildren():
775             url_n = n.find('./uri')
776             if url_n is None:
777                 continue
778
779             entries.append({
780                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
781                 'title': '%s - %s' % (title, n.tag),
782                 'url': compat_urlparse.urljoin(url, url_n.text),
783                 'duration': float_or_none(n.find('./duration').text),
784             })
785
786         return {
787             '_type': 'playlist',
788             'entries': entries,
789             'title': title,
790         }
791
792     def _real_extract(self, url):
793         if url.startswith('//'):
794             return {
795                 '_type': 'url',
796                 'url': self.http_scheme() + url,
797             }
798
799         parsed_url = compat_urlparse.urlparse(url)
800         if not parsed_url.scheme:
801             default_search = self._downloader.params.get('default_search')
802             if default_search is None:
803                 default_search = 'fixup_error'
804
805             if default_search in ('auto', 'auto_warning', 'fixup_error'):
806                 if '/' in url:
807                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
808                     return self.url_result('http://' + url)
809                 elif default_search != 'fixup_error':
810                     if default_search == 'auto_warning':
811                         if re.match(r'^(?:url|URL)$', url):
812                             raise ExtractorError(
813                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
814                                 expected=True)
815                         else:
816                             self._downloader.report_warning(
817                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
818                     return self.url_result('ytsearch:' + url)
819
820             if default_search in ('error', 'fixup_error'):
821                 raise ExtractorError(
822                     '%r is not a valid URL. '
823                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
824                     % (url, url), expected=True)
825             else:
826                 if ':' not in default_search:
827                     default_search += ':'
828                 return self.url_result(default_search + url)
829
830         url, smuggled_data = unsmuggle_url(url)
831         force_videoid = None
832         is_intentional = smuggled_data and smuggled_data.get('to_generic')
833         if smuggled_data and 'force_videoid' in smuggled_data:
834             force_videoid = smuggled_data['force_videoid']
835             video_id = force_videoid
836         else:
837             video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
838
839         self.to_screen('%s: Requesting header' % video_id)
840
841         head_req = HEADRequest(url)
842         head_response = self._request_webpage(
843             head_req, video_id,
844             note=False, errnote='Could not send HEAD request to %s' % url,
845             fatal=False)
846
847         if head_response is not False:
848             # Check for redirect
849             new_url = head_response.geturl()
850             if url != new_url:
851                 self.report_following_redirect(new_url)
852                 if force_videoid:
853                     new_url = smuggle_url(
854                         new_url, {'force_videoid': force_videoid})
855                 return self.url_result(new_url)
856
857         full_response = None
858         if head_response is False:
859             full_response = self._request_webpage(url, video_id)
860             head_response = full_response
861
862         # Check for direct link to a video
863         content_type = head_response.headers.get('Content-Type', '')
864         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
865         if m:
866             upload_date = unified_strdate(
867                 head_response.headers.get('Last-Modified'))
868             return {
869                 'id': video_id,
870                 'title': os.path.splitext(url_basename(url))[0],
871                 'direct': True,
872                 'formats': [{
873                     'format_id': m.group('format_id'),
874                     'url': url,
875                     'vcodec': 'none' if m.group('type') == 'audio' else None
876                 }],
877                 'upload_date': upload_date,
878             }
879
880         if not self._downloader.params.get('test', False) and not is_intentional:
881             self._downloader.report_warning('Falling back on generic information extractor.')
882
883         if not full_response:
884             full_response = self._request_webpage(url, video_id)
885
886         # Maybe it's a direct link to a video?
887         # Be careful not to download the whole thing!
888         first_bytes = full_response.read(512)
889         if not is_html(first_bytes):
890             self._downloader.report_warning(
891                 'URL could be a direct video link, returning it as such.')
892             upload_date = unified_strdate(
893                 head_response.headers.get('Last-Modified'))
894             return {
895                 'id': video_id,
896                 'title': os.path.splitext(url_basename(url))[0],
897                 'direct': True,
898                 'url': url,
899                 'upload_date': upload_date,
900             }
901
902         webpage = self._webpage_read_content(
903             full_response, url, video_id, prefix=first_bytes)
904
905         self.report_extraction(video_id)
906
907         # Is it an RSS feed?
908         try:
909             doc = parse_xml(webpage)
910             if doc.tag == 'rss':
911                 return self._extract_rss(url, video_id, doc)
912         except compat_xml_parse_error:
913             pass
914
915         # Is it a Camtasia project?
916         camtasia_res = self._extract_camtasia(url, video_id, webpage)
917         if camtasia_res is not None:
918             return camtasia_res
919
920         # Sometimes embedded video player is hidden behind percent encoding
921         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
922         # Unescaping the whole page allows to handle those cases in a generic way
923         webpage = compat_urllib_parse.unquote(webpage)
924
925         # it's tempting to parse this further, but you would
926         # have to take into account all the variations like
927         #   Video Title - Site Name
928         #   Site Name | Video Title
929         #   Video Title - Tagline | Site Name
930         # and so on and so forth; it's just not practical
931         video_title = self._html_search_regex(
932             r'(?s)<title>(.*?)</title>', webpage, 'video title',
933             default='video')
934
935         # Try to detect age limit automatically
936         age_limit = self._rta_search(webpage)
937         # And then there are the jokers who advertise that they use RTA,
938         # but actually don't.
939         AGE_LIMIT_MARKERS = [
940             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
941         ]
942         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
943             age_limit = 18
944
945         # video uploader is domain name
946         video_uploader = self._search_regex(
947             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
948
949         # Helper method
950         def _playlist_from_matches(matches, getter=None, ie=None):
951             urlrs = orderedSet(
952                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
953                 for m in matches)
954             return self.playlist_result(
955                 urlrs, playlist_id=video_id, playlist_title=video_title)
956
957         # Look for BrightCove:
958         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
959         if bc_urls:
960             self.to_screen('Brightcove video detected.')
961             entries = [{
962                 '_type': 'url',
963                 'url': smuggle_url(bc_url, {'Referer': url}),
964                 'ie_key': 'Brightcove'
965             } for bc_url in bc_urls]
966
967             return {
968                 '_type': 'playlist',
969                 'title': video_title,
970                 'id': video_id,
971                 'entries': entries,
972             }
973
974         # Look for embedded rtl.nl player
975         matches = re.findall(
976             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
977             webpage)
978         if matches:
979             return _playlist_from_matches(matches, ie='RtlNl')
980
981         # Look for embedded (iframe) Vimeo player
982         mobj = re.search(
983             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
984         if mobj:
985             player_url = unescapeHTML(mobj.group('url'))
986             surl = smuggle_url(player_url, {'Referer': url})
987             return self.url_result(surl)
988         # Look for embedded (swf embed) Vimeo player
989         mobj = re.search(
990             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
991         if mobj:
992             return self.url_result(mobj.group(1))
993
994         # Look for embedded YouTube player
995         matches = re.findall(r'''(?x)
996             (?:
997                 <iframe[^>]+?src=|
998                 data-video-url=|
999                 <embed[^>]+?src=|
1000                 embedSWF\(?:\s*|
1001                 new\s+SWFObject\(
1002             )
1003             (["\'])
1004                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1005                 (?:embed|v|p)/.+?)
1006             \1''', webpage)
1007         if matches:
1008             return _playlist_from_matches(
1009                 matches, lambda m: unescapeHTML(m[1]))
1010
1011         # Look for lazyYT YouTube embed
1012         matches = re.findall(
1013             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1014         if matches:
1015             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1016
1017         # Look for embedded Dailymotion player
1018         matches = re.findall(
1019             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1020         if matches:
1021             return _playlist_from_matches(
1022                 matches, lambda m: unescapeHTML(m[1]))
1023
1024         # Look for embedded Dailymotion playlist player (#3822)
1025         m = re.search(
1026             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1027         if m:
1028             playlists = re.findall(
1029                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1030             if playlists:
1031                 return _playlist_from_matches(
1032                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1033
1034         # Look for embedded Wistia player
1035         match = re.search(
1036             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1037         if match:
1038             embed_url = self._proto_relative_url(
1039                 unescapeHTML(match.group('url')))
1040             return {
1041                 '_type': 'url_transparent',
1042                 'url': embed_url,
1043                 'ie_key': 'Wistia',
1044                 'uploader': video_uploader,
1045                 'title': video_title,
1046                 'id': video_id,
1047             }
1048
1049         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1050         if match:
1051             return {
1052                 '_type': 'url_transparent',
1053                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1054                 'ie_key': 'Wistia',
1055                 'uploader': video_uploader,
1056                 'title': video_title,
1057                 'id': match.group('id')
1058             }
1059
1060         # Look for embedded blip.tv player
1061         mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
1062         if mobj:
1063             return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
1064         mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
1065         if mobj:
1066             return self.url_result(mobj.group(1), 'BlipTV')
1067
1068         # Look for embedded condenast player
1069         matches = re.findall(
1070             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1071             webpage)
1072         if matches:
1073             return {
1074                 '_type': 'playlist',
1075                 'entries': [{
1076                     '_type': 'url',
1077                     'ie_key': 'CondeNast',
1078                     'url': ma,
1079                 } for ma in matches],
1080                 'title': video_title,
1081                 'id': video_id,
1082             }
1083
1084         # Look for Bandcamp pages with custom domain
1085         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1086         if mobj is not None:
1087             burl = unescapeHTML(mobj.group(1))
1088             # Don't set the extractor because it can be a track url or an album
1089             return self.url_result(burl)
1090
1091         # Look for embedded Vevo player
1092         mobj = re.search(
1093             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1094         if mobj is not None:
1095             return self.url_result(mobj.group('url'))
1096
1097         # Look for embedded Viddler player
1098         mobj = re.search(
1099             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1100             webpage)
1101         if mobj is not None:
1102             return self.url_result(mobj.group('url'))
1103
1104         # Look for NYTimes player
1105         mobj = re.search(
1106             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1107             webpage)
1108         if mobj is not None:
1109             return self.url_result(mobj.group('url'))
1110
1111         # Look for Libsyn player
1112         mobj = re.search(
1113             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1114         if mobj is not None:
1115             return self.url_result(mobj.group('url'))
1116
1117         # Look for Ooyala videos
1118         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1119                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1120                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1121                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1122         if mobj is not None:
1123             return OoyalaIE._build_url_result(mobj.group('ec'))
1124
1125         # Look for multiple Ooyala embeds on SBN network websites
1126         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1127         if mobj is not None:
1128             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1129             if embeds:
1130                 return _playlist_from_matches(
1131                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1132
1133         # Look for Aparat videos
1134         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1135         if mobj is not None:
1136             return self.url_result(mobj.group(1), 'Aparat')
1137
1138         # Look for MPORA videos
1139         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1140         if mobj is not None:
1141             return self.url_result(mobj.group(1), 'Mpora')
1142
1143         # Look for embedded NovaMov-based player
1144         mobj = re.search(
1145             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1146                     (?P<url>http://(?:(?:embed|www)\.)?
1147                         (?:novamov\.com|
1148                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1149                            videoweed\.(?:es|com)|
1150                            movshare\.(?:net|sx|ag)|
1151                            divxstage\.(?:eu|net|ch|co|at|ag))
1152                         /embed\.php.+?)\1''', webpage)
1153         if mobj is not None:
1154             return self.url_result(mobj.group('url'))
1155
1156         # Look for embedded Facebook player
1157         mobj = re.search(
1158             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1159         if mobj is not None:
1160             return self.url_result(mobj.group('url'), 'Facebook')
1161
1162         # Look for embedded VK player
1163         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1164         if mobj is not None:
1165             return self.url_result(mobj.group('url'), 'VK')
1166
1167         # Look for embedded ivi player
1168         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1169         if mobj is not None:
1170             return self.url_result(mobj.group('url'), 'Ivi')
1171
1172         # Look for embedded Huffington Post player
1173         mobj = re.search(
1174             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1175         if mobj is not None:
1176             return self.url_result(mobj.group('url'), 'HuffPost')
1177
1178         # Look for embed.ly
1179         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1180         if mobj is not None:
1181             return self.url_result(mobj.group('url'))
1182         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1183         if mobj is not None:
1184             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1185
1186         # Look for funnyordie embed
1187         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1188         if matches:
1189             return _playlist_from_matches(
1190                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1191
1192         # Look for BBC iPlayer embed
1193         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1194         if matches:
1195             return _playlist_from_matches(matches, ie='BBCCoUk')
1196
1197         # Look for embedded RUTV player
1198         rutv_url = RUTVIE._extract_url(webpage)
1199         if rutv_url:
1200             return self.url_result(rutv_url, 'RUTV')
1201
1202         # Look for embedded TED player
1203         mobj = re.search(
1204             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1205         if mobj is not None:
1206             return self.url_result(mobj.group('url'), 'TED')
1207
1208         # Look for embedded Ustream videos
1209         mobj = re.search(
1210             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1211         if mobj is not None:
1212             return self.url_result(mobj.group('url'), 'Ustream')
1213
1214         # Look for embedded arte.tv player
1215         mobj = re.search(
1216             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1217             webpage)
1218         if mobj is not None:
1219             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1220
1221         # Look for embedded smotri.com player
1222         smotri_url = SmotriIE._extract_url(webpage)
1223         if smotri_url:
1224             return self.url_result(smotri_url, 'Smotri')
1225
1226         # Look for embeded soundcloud player
1227         mobj = re.search(
1228             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1229             webpage)
1230         if mobj is not None:
1231             url = unescapeHTML(mobj.group('url'))
1232             return self.url_result(url)
1233
1234         # Look for embedded vulture.com player
1235         mobj = re.search(
1236             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1237             webpage)
1238         if mobj is not None:
1239             url = unescapeHTML(mobj.group('url'))
1240             return self.url_result(url, ie='Vulture')
1241
1242         # Look for embedded mtvservices player
1243         mobj = re.search(
1244             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1245             webpage)
1246         if mobj is not None:
1247             url = unescapeHTML(mobj.group('url'))
1248             return self.url_result(url, ie='MTVServicesEmbedded')
1249
1250         # Look for embedded yahoo player
1251         mobj = re.search(
1252             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1253             webpage)
1254         if mobj is not None:
1255             return self.url_result(mobj.group('url'), 'Yahoo')
1256
1257         # Look for embedded sbs.com.au player
1258         mobj = re.search(
1259             r'''(?x)
1260             (?:
1261                 <meta\s+property="og:video"\s+content=|
1262                 <iframe[^>]+?src=
1263             )
1264             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1265             webpage)
1266         if mobj is not None:
1267             return self.url_result(mobj.group('url'), 'SBS')
1268
1269         # Look for embedded Cinchcast player
1270         mobj = re.search(
1271             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1272             webpage)
1273         if mobj is not None:
1274             return self.url_result(mobj.group('url'), 'Cinchcast')
1275
1276         mobj = re.search(
1277             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1278             webpage)
1279         if mobj is not None:
1280             return self.url_result(mobj.group('url'), 'MLB')
1281
1282         mobj = re.search(
1283             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1284             webpage)
1285         if mobj is not None:
1286             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1287
1288         mobj = re.search(
1289             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1290             webpage)
1291         if mobj is not None:
1292             return self.url_result(mobj.group('url'), 'Livestream')
1293
1294         # Look for Zapiks embed
1295         mobj = re.search(
1296             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1297         if mobj is not None:
1298             return self.url_result(mobj.group('url'), 'Zapiks')
1299
1300         # Look for Kaltura embeds
1301         mobj = re.search(
1302             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1303         if mobj is not None:
1304             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1305
1306         # Look for Eagle.Platform embeds
1307         mobj = re.search(
1308             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1309         if mobj is not None:
1310             return self.url_result(mobj.group('url'), 'EaglePlatform')
1311
1312         # Look for ClipYou (uses Eagle.Platform) embeds
1313         mobj = re.search(
1314             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1315         if mobj is not None:
1316             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1317
1318         # Look for Pladform embeds
1319         mobj = re.search(
1320             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1321         if mobj is not None:
1322             return self.url_result(mobj.group('url'), 'Pladform')
1323
1324         # Look for Playwire embeds
1325         mobj = re.search(
1326             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1327         if mobj is not None:
1328             return self.url_result(mobj.group('url'))
1329
1330         # Look for 5min embeds
1331         mobj = re.search(
1332             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1333         if mobj is not None:
1334             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1335
1336         # Look for Crooks and Liars embeds
1337         mobj = re.search(
1338             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1339         if mobj is not None:
1340             return self.url_result(mobj.group('url'))
1341
1342         # Look for NBC Sports VPlayer embeds
1343         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1344         if nbc_sports_url:
1345             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1346
1347         # Look for UDN embeds
1348         mobj = re.search(
1349             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1350         if mobj is not None:
1351             return self.url_result(
1352                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1353
1354         def check_video(vurl):
1355             if YoutubeIE.suitable(vurl):
1356                 return True
1357             vpath = compat_urlparse.urlparse(vurl).path
1358             vext = determine_ext(vpath)
1359             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1360
1361         def filter_video(urls):
1362             return list(filter(check_video, urls))
1363
1364         # Start with something easy: JW Player in SWFObject
1365         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1366         if not found:
1367             # Look for gorilla-vid style embedding
1368             found = filter_video(re.findall(r'''(?sx)
1369                 (?:
1370                     jw_plugins|
1371                     JWPlayerOptions|
1372                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1373                 )
1374                 .*?
1375                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1376         if not found:
1377             # Broaden the search a little bit
1378             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1379         if not found:
1380             # Broaden the findall a little bit: JWPlayer JS loader
1381             found = filter_video(re.findall(
1382                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1383         if not found:
1384             # Flow player
1385             found = filter_video(re.findall(r'''(?xs)
1386                 flowplayer\("[^"]+",\s*
1387                     \{[^}]+?\}\s*,
1388                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1389                         ["']?url["']?\s*:\s*["']([^"']+)["']
1390             ''', webpage))
1391         if not found:
1392             # Cinerama player
1393             found = re.findall(
1394                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1395         if not found:
1396             # Try to find twitter cards info
1397             found = filter_video(re.findall(
1398                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1399         if not found:
1400             # We look for Open Graph info:
1401             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1402             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1403             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1404             if m_video_type is not None:
1405                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1406         if not found:
1407             # HTML5 video
1408             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1409         if not found:
1410             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1411             found = re.search(
1412                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1413                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1414                 webpage)
1415             if not found:
1416                 # Look also in Refresh HTTP header
1417                 refresh_header = head_response.headers.get('Refresh')
1418                 if refresh_header:
1419                     found = re.search(REDIRECT_REGEX, refresh_header)
1420             if found:
1421                 new_url = found.group(1)
1422                 self.report_following_redirect(new_url)
1423                 return {
1424                     '_type': 'url',
1425                     'url': new_url,
1426                 }
1427         if not found:
1428             raise UnsupportedError(url)
1429
1430         entries = []
1431         for video_url in found:
1432             video_url = compat_urlparse.urljoin(url, video_url)
1433             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1434
1435             # Sometimes, jwplayer extraction will result in a YouTube URL
1436             if YoutubeIE.suitable(video_url):
1437                 entries.append(self.url_result(video_url, 'Youtube'))
1438                 continue
1439
1440             # here's a fun little line of code for you:
1441             video_id = os.path.splitext(video_id)[0]
1442
1443             entries.append({
1444                 'id': video_id,
1445                 'url': video_url,
1446                 'uploader': video_uploader,
1447                 'title': video_title,
1448                 'age_limit': age_limit,
1449             })
1450
1451         if len(entries) == 1:
1452             return entries[0]
1453         else:
1454             for num, e in enumerate(entries, start=1):
1455                 # 'url' results don't have a title
1456                 if e.get('title') is not None:
1457                     e['title'] = '%s (%d)' % (e['title'], num)
1458             return {
1459                 '_type': 'playlist',
1460                 'entries': entries,
1461             }