Merge branch 'youku' of https://github.com/PeterDing/youtube-dl into PeterDing-youku
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7
8 from .common import InfoExtractor
9 from .youtube import YoutubeIE
10 from ..compat import (
11     compat_urllib_parse,
12     compat_urllib_parse_unquote,
13     compat_urllib_request,
14     compat_urlparse,
15     compat_xml_parse_error,
16 )
17 from ..utils import (
18     determine_ext,
19     ExtractorError,
20     float_or_none,
21     HEADRequest,
22     is_html,
23     orderedSet,
24     parse_xml,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import BrightcoveIE
34 from .nbc import NBCSportsVPlayerIE
35 from .ooyala import OoyalaIE
36 from .rutv import RUTVIE
37 from .tvc import TVCIE
38 from .sportbox import SportBoxEmbedIE
39 from .smotri import SmotriIE
40 from .condenast import CondeNastIE
41 from .udn import UDNEmbedIE
42 from .senateisvp import SenateISVPIE
43 from .bliptv import BlipTVIE
44 from .svt import SVTIE
45 from .pornhub import PornHubIE
46
47
48 class GenericIE(InfoExtractor):
49     IE_DESC = 'Generic downloader that works on some sites'
50     _VALID_URL = r'.*'
51     IE_NAME = 'generic'
52     _TESTS = [
53         # Direct link to a video
54         {
55             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
56             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
57             'info_dict': {
58                 'id': 'trailer',
59                 'ext': 'mp4',
60                 'title': 'trailer',
61                 'upload_date': '20100513',
62             }
63         },
64         # Direct link to media delivered compressed (until Accept-Encoding is *)
65         {
66             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
67             'md5': '128c42e68b13950268b648275386fc74',
68             'info_dict': {
69                 'id': 'FictionJunction-Parallel_Hearts',
70                 'ext': 'flac',
71                 'title': 'FictionJunction-Parallel_Hearts',
72                 'upload_date': '20140522',
73             },
74             'expected_warnings': [
75                 'URL could be a direct video link, returning it as such.'
76             ]
77         },
78         # Direct download with broken HEAD
79         {
80             'url': 'http://ai-radio.org:8000/radio.opus',
81             'info_dict': {
82                 'id': 'radio',
83                 'ext': 'opus',
84                 'title': 'radio',
85             },
86             'params': {
87                 'skip_download': True,  # infinite live stream
88             },
89             'expected_warnings': [
90                 r'501.*Not Implemented'
91             ],
92         },
93         # Direct link with incorrect MIME type
94         {
95             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
96             'md5': '4ccbebe5f36706d85221f204d7eb5913',
97             'info_dict': {
98                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
99                 'id': '5_Lennart_Poettering_-_Systemd',
100                 'ext': 'webm',
101                 'title': '5_Lennart_Poettering_-_Systemd',
102                 'upload_date': '20141120',
103             },
104             'expected_warnings': [
105                 'URL could be a direct video link, returning it as such.'
106             ]
107         },
108         # RSS feed
109         {
110             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
111             'info_dict': {
112                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
113                 'title': 'Zero Punctuation',
114                 'description': 're:.*groundbreaking video review series.*'
115             },
116             'playlist_mincount': 11,
117         },
118         # RSS feed with enclosure
119         {
120             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
121             'info_dict': {
122                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
123                 'ext': 'm4v',
124                 'upload_date': '20150228',
125                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
126             }
127         },
128         # google redirect
129         {
130             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
131             'info_dict': {
132                 'id': 'cmQHVoWB5FY',
133                 'ext': 'mp4',
134                 'upload_date': '20130224',
135                 'uploader_id': 'TheVerge',
136                 'description': 're:^Chris Ziegler takes a look at the\.*',
137                 'uploader': 'The Verge',
138                 'title': 'First Firefox OS phones side-by-side',
139             },
140             'params': {
141                 'skip_download': False,
142             }
143         },
144         {
145             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
146             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
147             'info_dict': {
148                 'id': '13601338388002',
149                 'ext': 'mp4',
150                 'uploader': 'www.hodiho.fr',
151                 'title': 'R\u00e9gis plante sa Jeep',
152             }
153         },
154         # bandcamp page with custom domain
155         {
156             'add_ie': ['Bandcamp'],
157             'url': 'http://bronyrock.com/track/the-pony-mash',
158             'info_dict': {
159                 'id': '3235767654',
160                 'ext': 'mp3',
161                 'title': 'The Pony Mash',
162                 'uploader': 'M_Pallante',
163             },
164             'skip': 'There is a limit of 200 free downloads / month for the test song',
165         },
166         # embedded brightcove video
167         # it also tests brightcove videos that need to set the 'Referer' in the
168         # http requests
169         {
170             'add_ie': ['Brightcove'],
171             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
172             'info_dict': {
173                 'id': '2765128793001',
174                 'ext': 'mp4',
175                 'title': 'Le cours de bourse : l’analyse technique',
176                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
177                 'uploader': 'BFM BUSINESS',
178             },
179             'params': {
180                 'skip_download': True,
181             },
182         },
183         {
184             # https://github.com/rg3/youtube-dl/issues/2253
185             'url': 'http://bcove.me/i6nfkrc3',
186             'md5': '0ba9446db037002366bab3b3eb30c88c',
187             'info_dict': {
188                 'id': '3101154703001',
189                 'ext': 'mp4',
190                 'title': 'Still no power',
191                 'uploader': 'thestar.com',
192                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
193             },
194             'add_ie': ['Brightcove'],
195         },
196         {
197             'url': 'http://www.championat.com/video/football/v/87/87499.html',
198             'md5': 'fb973ecf6e4a78a67453647444222983',
199             'info_dict': {
200                 'id': '3414141473001',
201                 'ext': 'mp4',
202                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
203                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
204                 'uploader': 'Championat',
205             },
206         },
207         {
208             # https://github.com/rg3/youtube-dl/issues/3541
209             'add_ie': ['Brightcove'],
210             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
211             'info_dict': {
212                 'id': '3866516442001',
213                 'ext': 'mp4',
214                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
215                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
216                 'uploader': 'SBS Broadcasting',
217             },
218             'skip': 'Restricted to Netherlands',
219             'params': {
220                 'skip_download': True,  # m3u8 download
221             },
222         },
223         # ooyala video
224         {
225             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
226             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
227             'info_dict': {
228                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
229                 'ext': 'mp4',
230                 'title': '2cc213299525360.mov',  # that's what we get
231             },
232             'add_ie': ['Ooyala'],
233         },
234         # multiple ooyala embeds on SBN network websites
235         {
236             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
237             'info_dict': {
238                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
239                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
240             },
241             'playlist_mincount': 3,
242             'params': {
243                 'skip_download': True,
244             },
245             'add_ie': ['Ooyala'],
246         },
247         # embed.ly video
248         {
249             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
250             'info_dict': {
251                 'id': '9ODmcdjQcHQ',
252                 'ext': 'mp4',
253                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
254                 'upload_date': '20140225',
255                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
256                 'uploader': 'Tested',
257                 'uploader_id': 'testedcom',
258             },
259             # No need to test YoutubeIE here
260             'params': {
261                 'skip_download': True,
262             },
263         },
264         # funnyordie embed
265         {
266             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
267             'info_dict': {
268                 'id': '18e820ec3f',
269                 'ext': 'mp4',
270                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
271                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
272             },
273         },
274         # BBC iPlayer embeds
275         {
276             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
277             'info_dict': {
278                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
279             },
280             'playlist_mincount': 18,
281         },
282         # RUTV embed
283         {
284             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
285             'info_dict': {
286                 'id': '776940',
287                 'ext': 'mp4',
288                 'title': 'Охотское море стало целиком российским',
289                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
290             },
291             'params': {
292                 # m3u8 download
293                 'skip_download': True,
294             },
295         },
296         # TVC embed
297         {
298             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
299             'info_dict': {
300                 'id': '55304',
301                 'ext': 'mp4',
302                 'title': 'Дошкольное воспитание',
303             },
304         },
305         # SportBox embed
306         {
307             'url': 'http://www.vestifinance.ru/articles/25753',
308             'info_dict': {
309                 'id': '25753',
310                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
311             },
312             'playlist': [{
313                 'info_dict': {
314                     'id': '370908',
315                     'title': 'Госзаказ. День 3',
316                     'ext': 'mp4',
317                 }
318             }, {
319                 'info_dict': {
320                     'id': '370905',
321                     'title': 'Госзаказ. День 2',
322                     'ext': 'mp4',
323                 }
324             }, {
325                 'info_dict': {
326                     'id': '370902',
327                     'title': 'Госзаказ. День 1',
328                     'ext': 'mp4',
329                 }
330             }],
331             'params': {
332                 # m3u8 download
333                 'skip_download': True,
334             },
335         },
336         # Embedded TED video
337         {
338             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
339             'md5': '65fdff94098e4a607385a60c5177c638',
340             'info_dict': {
341                 'id': '1969',
342                 'ext': 'mp4',
343                 'title': 'Hidden miracles of the natural world',
344                 'uploader': 'Louie Schwartzberg',
345                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
346             }
347         },
348         # Embeded Ustream video
349         {
350             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
351             'md5': '27b99cdb639c9b12a79bca876a073417',
352             'info_dict': {
353                 'id': '45734260',
354                 'ext': 'flv',
355                 'uploader': 'AU SPA:  The NSA and Privacy',
356                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
357             }
358         },
359         # nowvideo embed hidden behind percent encoding
360         {
361             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
362             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
363             'info_dict': {
364                 'id': '06e53103ca9aa',
365                 'ext': 'flv',
366                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
367                 'description': 'No description',
368             },
369         },
370         # arte embed
371         {
372             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
373             'md5': '7653032cbb25bf6c80d80f217055fa43',
374             'info_dict': {
375                 'id': '048195-004_PLUS7-F',
376                 'ext': 'flv',
377                 'title': 'X:enius',
378                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
379                 'upload_date': '20140320',
380             },
381             'params': {
382                 'skip_download': 'Requires rtmpdump'
383             }
384         },
385         # Condé Nast embed
386         {
387             'url': 'http://www.wired.com/2014/04/honda-asimo/',
388             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
389             'info_dict': {
390                 'id': '53501be369702d3275860000',
391                 'ext': 'mp4',
392                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
393             }
394         },
395         # Dailymotion embed
396         {
397             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
398             'md5': '441aeeb82eb72c422c7f14ec533999cd',
399             'info_dict': {
400                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
401                 'ext': 'mp4',
402                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
403                 'uploader': 'Spi0n',
404             },
405             'add_ie': ['Dailymotion'],
406         },
407         # YouTube embed
408         {
409             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
410             'info_dict': {
411                 'id': 'FXRb4ykk4S0',
412                 'ext': 'mp4',
413                 'title': 'The NBL Auction 2014',
414                 'uploader': 'BADMINTON England',
415                 'uploader_id': 'BADMINTONEvents',
416                 'upload_date': '20140603',
417                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
418             },
419             'add_ie': ['Youtube'],
420             'params': {
421                 'skip_download': True,
422             }
423         },
424         # MTVSercices embed
425         {
426             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
427             'md5': '35727f82f58c76d996fc188f9755b0d5',
428             'info_dict': {
429                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
430                 'ext': 'mp4',
431                 'title': 'Review',
432                 'description': 'Mario\'s life in the fast lane has never looked so good.',
433             },
434         },
435         # YouTube embed via <data-embed-url="">
436         {
437             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
438             'info_dict': {
439                 'id': '4vAffPZIT44',
440                 'ext': 'mp4',
441                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
442                 'uploader': 'Gameloft',
443                 'uploader_id': 'gameloft',
444                 'upload_date': '20140828',
445                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
446             },
447             'params': {
448                 'skip_download': True,
449             }
450         },
451         # Camtasia studio
452         {
453             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
454             'playlist': [{
455                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
456                 'info_dict': {
457                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
458                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
459                     'ext': 'flv',
460                     'duration': 2235.90,
461                 }
462             }, {
463                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
464                 'info_dict': {
465                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
466                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
467                     'ext': 'flv',
468                     'duration': 2235.93,
469                 }
470             }],
471             'info_dict': {
472                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
473             }
474         },
475         # Flowplayer
476         {
477             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
478             'md5': '9d65602bf31c6e20014319c7d07fba27',
479             'info_dict': {
480                 'id': '5123ea6d5e5a7',
481                 'ext': 'mp4',
482                 'age_limit': 18,
483                 'uploader': 'www.handjobhub.com',
484                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
485             }
486         },
487         # Multiple brightcove videos
488         # https://github.com/rg3/youtube-dl/issues/2283
489         {
490             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
491             'info_dict': {
492                 'id': 'always-never',
493                 'title': 'Always / Never - The New Yorker',
494             },
495             'playlist_count': 3,
496             'params': {
497                 'extract_flat': False,
498                 'skip_download': True,
499             }
500         },
501         # MLB embed
502         {
503             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
504             'md5': '96f09a37e44da40dd083e12d9a683327',
505             'info_dict': {
506                 'id': '33322633',
507                 'ext': 'mp4',
508                 'title': 'Ump changes call to ball',
509                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
510                 'duration': 48,
511                 'timestamp': 1401537900,
512                 'upload_date': '20140531',
513                 'thumbnail': 're:^https?://.*\.jpg$',
514             },
515         },
516         # Wistia embed
517         {
518             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
519             'md5': '8788b683c777a5cf25621eaf286d0c23',
520             'info_dict': {
521                 'id': '1cfaf6b7ea',
522                 'ext': 'mov',
523                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
524                 'duration': 643.0,
525                 'filesize': 182808282,
526                 'uploader': 'education-portal.com',
527             },
528         },
529         {
530             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
531             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
532             'info_dict': {
533                 'id': 'uxjb0lwrcz',
534                 'ext': 'mp4',
535                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
536                 'duration': 1715.0,
537                 'uploader': 'thoughtworks.wistia.com',
538             },
539         },
540         # Soundcloud embed
541         {
542             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
543             'info_dict': {
544                 'id': '174391317',
545                 'ext': 'mp3',
546                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
547                 'uploader': 'Sophos Security',
548                 'title': 'Chet Chat 171 - Oct 29, 2014',
549                 'upload_date': '20141029',
550             }
551         },
552         # Livestream embed
553         {
554             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
555             'info_dict': {
556                 'id': '67864563',
557                 'ext': 'flv',
558                 'upload_date': '20141112',
559                 'title': 'Rosetta #CometLanding webcast HL 10',
560             }
561         },
562         # LazyYT
563         {
564             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
565             'info_dict': {
566                 'id': '1986',
567                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
568             },
569             'playlist_mincount': 2,
570         },
571         # Cinchcast embed
572         {
573             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
574             'info_dict': {
575                 'id': '7141703',
576                 'ext': 'mp3',
577                 'upload_date': '20141126',
578                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
579             }
580         },
581         # Cinerama player
582         {
583             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
584             'info_dict': {
585                 'id': '730m_DandD_1901_512k',
586                 'ext': 'mp4',
587                 'uploader': 'www.abc.net.au',
588                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
589             }
590         },
591         # embedded viddler video
592         {
593             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
594             'info_dict': {
595                 'id': '4d03aad9',
596                 'ext': 'mp4',
597                 'uploader': 'deadspin',
598                 'title': 'WALL-TO-GORTAT',
599                 'timestamp': 1422285291,
600                 'upload_date': '20150126',
601             },
602             'add_ie': ['Viddler'],
603         },
604         # Libsyn embed
605         {
606             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
607             'info_dict': {
608                 'id': '3377616',
609                 'ext': 'mp3',
610                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
611                 'description': 'md5:601cb790edd05908957dae8aaa866465',
612                 'upload_date': '20150220',
613             },
614         },
615         # jwplayer YouTube
616         {
617             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
618             'info_dict': {
619                 'id': 'Mrj4DVp2zeA',
620                 'ext': 'mp4',
621                 'upload_date': '20150212',
622                 'uploader': 'The National Archives UK',
623                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
624                 'uploader_id': 'NationalArchives08',
625                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
626             },
627         },
628         # rtl.nl embed
629         {
630             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
631             'playlist_mincount': 5,
632             'info_dict': {
633                 'id': 'aanslagen-kopenhagen',
634                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
635             }
636         },
637         # Zapiks embed
638         {
639             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
640             'info_dict': {
641                 'id': '118046',
642                 'ext': 'mp4',
643                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
644             }
645         },
646         # Kaltura embed
647         {
648             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
649             'info_dict': {
650                 'id': '1_eergr3h1',
651                 'ext': 'mp4',
652                 'upload_date': '20150226',
653                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
654                 'timestamp': int,
655                 'title': 'John Carlson Postgame 2/25/15',
656             },
657         },
658         # Eagle.Platform embed (generic URL)
659         {
660             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
661             'info_dict': {
662                 'id': '227304',
663                 'ext': 'mp4',
664                 'title': 'Навальный вышел на свободу',
665                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
666                 'thumbnail': 're:^https?://.*\.jpg$',
667                 'duration': 87,
668                 'view_count': int,
669                 'age_limit': 0,
670             },
671         },
672         # ClipYou (Eagle.Platform) embed (custom URL)
673         {
674             'url': 'http://muz-tv.ru/play/7129/',
675             'info_dict': {
676                 'id': '12820',
677                 'ext': 'mp4',
678                 'title': "'O Sole Mio",
679                 'thumbnail': 're:^https?://.*\.jpg$',
680                 'duration': 216,
681                 'view_count': int,
682             },
683         },
684         # Pladform embed
685         {
686             'url': 'http://muz-tv.ru/kinozal/view/7400/',
687             'info_dict': {
688                 'id': '100183293',
689                 'ext': 'mp4',
690                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
691                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
692                 'thumbnail': 're:^https?://.*\.jpg$',
693                 'duration': 694,
694                 'age_limit': 0,
695             },
696         },
697         # Playwire embed
698         {
699             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
700             'info_dict': {
701                 'id': '3519514',
702                 'ext': 'mp4',
703                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
704                 'thumbnail': 're:^https?://.*\.png$',
705                 'duration': 45.115,
706             },
707         },
708         # 5min embed
709         {
710             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
711             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
712             'info_dict': {
713                 'id': '518726732',
714                 'ext': 'mp4',
715                 'title': 'Facebook Creates "On This Day" | Crunch Report',
716             },
717         },
718         # SVT embed
719         {
720             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
721             'info_dict': {
722                 'id': '2900353',
723                 'ext': 'flv',
724                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
725                 'duration': 27,
726                 'age_limit': 0,
727             },
728         },
729         # Crooks and Liars embed
730         {
731             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
732             'info_dict': {
733                 'id': '8RUoRhRi',
734                 'ext': 'mp4',
735                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
736                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
737                 'timestamp': 1428207000,
738                 'upload_date': '20150405',
739                 'uploader': 'Heather',
740             },
741         },
742         # Crooks and Liars external embed
743         {
744             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
745             'info_dict': {
746                 'id': 'MTE3MjUtMzQ2MzA',
747                 'ext': 'mp4',
748                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
749                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
750                 'timestamp': 1265032391,
751                 'upload_date': '20100201',
752                 'uploader': 'Heather',
753             },
754         },
755         # NBC Sports vplayer embed
756         {
757             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
758             'info_dict': {
759                 'id': 'ln7x1qSThw4k',
760                 'ext': 'flv',
761                 'title': "PFT Live: New leader in the 'new-look' defense",
762                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
763             },
764         },
765         # UDN embed
766         {
767             'url': 'http://www.udn.com/news/story/7314/822787',
768             'md5': 'fd2060e988c326991037b9aff9df21a6',
769             'info_dict': {
770                 'id': '300346',
771                 'ext': 'mp4',
772                 'title': '中一中男師變性 全校師生力挺',
773                 'thumbnail': 're:^https?://.*\.jpg$',
774             }
775         },
776         # Ooyala embed
777         {
778             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
779             'info_dict': {
780                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
781                 'ext': 'mp4',
782                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
783                 'title': 'This is what separates the Excel masters from the wannabes',
784             },
785             'params': {
786                 # m3u8 downloads
787                 'skip_download': True,
788             }
789         },
790         # Contains a SMIL manifest
791         {
792             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
793             'info_dict': {
794                 'id': 'file',
795                 'ext': 'flv',
796                 'title': '+ Football: Lottery Champions League Europe',
797                 'uploader': 'www.telewebion.com',
798             },
799             'params': {
800                 # rtmpe downloads
801                 'skip_download': True,
802             }
803         },
804         # Brightcove URL in single quotes
805         {
806             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
807             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
808             'info_dict': {
809                 'id': '4255764656001',
810                 'ext': 'mp4',
811                 'title': 'SN Presents: Russell Martin, World Citizen',
812                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
813                 'uploader': 'Rogers Sportsnet',
814             },
815         }
816     ]
817
818     def report_following_redirect(self, new_url):
819         """Report information extraction."""
820         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
821
822     def _extract_rss(self, url, video_id, doc):
823         playlist_title = doc.find('./channel/title').text
824         playlist_desc_el = doc.find('./channel/description')
825         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
826
827         entries = []
828         for it in doc.findall('./channel/item'):
829             next_url = xpath_text(it, 'link', fatal=False)
830             if not next_url:
831                 enclosure_nodes = it.findall('./enclosure')
832                 for e in enclosure_nodes:
833                     next_url = e.attrib.get('url')
834                     if next_url:
835                         break
836
837             if not next_url:
838                 continue
839
840             entries.append({
841                 '_type': 'url',
842                 'url': next_url,
843                 'title': it.find('title').text,
844             })
845
846         return {
847             '_type': 'playlist',
848             'id': url,
849             'title': playlist_title,
850             'description': playlist_desc,
851             'entries': entries,
852         }
853
854     def _extract_camtasia(self, url, video_id, webpage):
855         """ Returns None if no camtasia video can be found. """
856
857         camtasia_cfg = self._search_regex(
858             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
859             webpage, 'camtasia configuration file', default=None)
860         if camtasia_cfg is None:
861             return None
862
863         title = self._html_search_meta('DC.title', webpage, fatal=True)
864
865         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
866         camtasia_cfg = self._download_xml(
867             camtasia_url, video_id,
868             note='Downloading camtasia configuration',
869             errnote='Failed to download camtasia configuration')
870         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
871
872         entries = []
873         for n in fileset_node.getchildren():
874             url_n = n.find('./uri')
875             if url_n is None:
876                 continue
877
878             entries.append({
879                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
880                 'title': '%s - %s' % (title, n.tag),
881                 'url': compat_urlparse.urljoin(url, url_n.text),
882                 'duration': float_or_none(n.find('./duration').text),
883             })
884
885         return {
886             '_type': 'playlist',
887             'entries': entries,
888             'title': title,
889         }
890
891     def _real_extract(self, url):
892         if url.startswith('//'):
893             return {
894                 '_type': 'url',
895                 'url': self.http_scheme() + url,
896             }
897
898         parsed_url = compat_urlparse.urlparse(url)
899         if not parsed_url.scheme:
900             default_search = self._downloader.params.get('default_search')
901             if default_search is None:
902                 default_search = 'fixup_error'
903
904             if default_search in ('auto', 'auto_warning', 'fixup_error'):
905                 if '/' in url:
906                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
907                     return self.url_result('http://' + url)
908                 elif default_search != 'fixup_error':
909                     if default_search == 'auto_warning':
910                         if re.match(r'^(?:url|URL)$', url):
911                             raise ExtractorError(
912                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
913                                 expected=True)
914                         else:
915                             self._downloader.report_warning(
916                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
917                     return self.url_result('ytsearch:' + url)
918
919             if default_search in ('error', 'fixup_error'):
920                 raise ExtractorError(
921                     '%r is not a valid URL. '
922                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
923                     % (url, url), expected=True)
924             else:
925                 if ':' not in default_search:
926                     default_search += ':'
927                 return self.url_result(default_search + url)
928
929         url, smuggled_data = unsmuggle_url(url)
930         force_videoid = None
931         is_intentional = smuggled_data and smuggled_data.get('to_generic')
932         if smuggled_data and 'force_videoid' in smuggled_data:
933             force_videoid = smuggled_data['force_videoid']
934             video_id = force_videoid
935         else:
936             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
937
938         self.to_screen('%s: Requesting header' % video_id)
939
940         head_req = HEADRequest(url)
941         head_response = self._request_webpage(
942             head_req, video_id,
943             note=False, errnote='Could not send HEAD request to %s' % url,
944             fatal=False)
945
946         if head_response is not False:
947             # Check for redirect
948             new_url = head_response.geturl()
949             if url != new_url:
950                 self.report_following_redirect(new_url)
951                 if force_videoid:
952                     new_url = smuggle_url(
953                         new_url, {'force_videoid': force_videoid})
954                 return self.url_result(new_url)
955
956         full_response = None
957         if head_response is False:
958             request = compat_urllib_request.Request(url)
959             request.add_header('Accept-Encoding', '*')
960             full_response = self._request_webpage(request, video_id)
961             head_response = full_response
962
963         # Check for direct link to a video
964         content_type = head_response.headers.get('Content-Type', '')
965         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
966         if m:
967             upload_date = unified_strdate(
968                 head_response.headers.get('Last-Modified'))
969             return {
970                 'id': video_id,
971                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
972                 'direct': True,
973                 'formats': [{
974                     'format_id': m.group('format_id'),
975                     'url': url,
976                     'vcodec': 'none' if m.group('type') == 'audio' else None
977                 }],
978                 'upload_date': upload_date,
979             }
980
981         if not self._downloader.params.get('test', False) and not is_intentional:
982             self._downloader.report_warning('Falling back on generic information extractor.')
983
984         if not full_response:
985             request = compat_urllib_request.Request(url)
986             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
987             # making it impossible to download only chunk of the file (yet we need only 512kB to
988             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
989             # that will always result in downloading the whole file that is not desirable.
990             # Therefore for extraction pass we have to override Accept-Encoding to any in order
991             # to accept raw bytes and being able to download only a chunk.
992             # It may probably better to solve this by checking Content-Type for application/octet-stream
993             # after HEAD request finishes, but not sure if we can rely on this.
994             request.add_header('Accept-Encoding', '*')
995             full_response = self._request_webpage(request, video_id)
996
997         # Maybe it's a direct link to a video?
998         # Be careful not to download the whole thing!
999         first_bytes = full_response.read(512)
1000         if not is_html(first_bytes):
1001             self._downloader.report_warning(
1002                 'URL could be a direct video link, returning it as such.')
1003             upload_date = unified_strdate(
1004                 head_response.headers.get('Last-Modified'))
1005             return {
1006                 'id': video_id,
1007                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1008                 'direct': True,
1009                 'url': url,
1010                 'upload_date': upload_date,
1011             }
1012
1013         webpage = self._webpage_read_content(
1014             full_response, url, video_id, prefix=first_bytes)
1015
1016         self.report_extraction(video_id)
1017
1018         # Is it an RSS feed?
1019         try:
1020             doc = parse_xml(webpage)
1021             if doc.tag == 'rss':
1022                 return self._extract_rss(url, video_id, doc)
1023         except compat_xml_parse_error:
1024             pass
1025
1026         # Is it a Camtasia project?
1027         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1028         if camtasia_res is not None:
1029             return camtasia_res
1030
1031         # Sometimes embedded video player is hidden behind percent encoding
1032         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1033         # Unescaping the whole page allows to handle those cases in a generic way
1034         webpage = compat_urllib_parse.unquote(webpage)
1035
1036         # it's tempting to parse this further, but you would
1037         # have to take into account all the variations like
1038         #   Video Title - Site Name
1039         #   Site Name | Video Title
1040         #   Video Title - Tagline | Site Name
1041         # and so on and so forth; it's just not practical
1042         video_title = self._html_search_regex(
1043             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1044             default='video')
1045
1046         # Try to detect age limit automatically
1047         age_limit = self._rta_search(webpage)
1048         # And then there are the jokers who advertise that they use RTA,
1049         # but actually don't.
1050         AGE_LIMIT_MARKERS = [
1051             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1052         ]
1053         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1054             age_limit = 18
1055
1056         # video uploader is domain name
1057         video_uploader = self._search_regex(
1058             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1059
1060         # Helper method
1061         def _playlist_from_matches(matches, getter=None, ie=None):
1062             urlrs = orderedSet(
1063                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1064                 for m in matches)
1065             return self.playlist_result(
1066                 urlrs, playlist_id=video_id, playlist_title=video_title)
1067
1068         # Look for BrightCove:
1069         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1070         if bc_urls:
1071             self.to_screen('Brightcove video detected.')
1072             entries = [{
1073                 '_type': 'url',
1074                 'url': smuggle_url(bc_url, {'Referer': url}),
1075                 'ie_key': 'Brightcove'
1076             } for bc_url in bc_urls]
1077
1078             return {
1079                 '_type': 'playlist',
1080                 'title': video_title,
1081                 'id': video_id,
1082                 'entries': entries,
1083             }
1084
1085         # Look for embedded rtl.nl player
1086         matches = re.findall(
1087             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1088             webpage)
1089         if matches:
1090             return _playlist_from_matches(matches, ie='RtlNl')
1091
1092         # Look for embedded (iframe) Vimeo player
1093         mobj = re.search(
1094             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
1095         if mobj:
1096             player_url = unescapeHTML(mobj.group('url'))
1097             surl = smuggle_url(player_url, {'Referer': url})
1098             return self.url_result(surl)
1099         # Look for embedded (swf embed) Vimeo player
1100         mobj = re.search(
1101             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1102         if mobj:
1103             return self.url_result(mobj.group(1))
1104
1105         # Look for embedded YouTube player
1106         matches = re.findall(r'''(?x)
1107             (?:
1108                 <iframe[^>]+?src=|
1109                 data-video-url=|
1110                 <embed[^>]+?src=|
1111                 embedSWF\(?:\s*|
1112                 new\s+SWFObject\(
1113             )
1114             (["\'])
1115                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1116                 (?:embed|v|p)/.+?)
1117             \1''', webpage)
1118         if matches:
1119             return _playlist_from_matches(
1120                 matches, lambda m: unescapeHTML(m[1]))
1121
1122         # Look for lazyYT YouTube embed
1123         matches = re.findall(
1124             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1125         if matches:
1126             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1127
1128         # Look for embedded Dailymotion player
1129         matches = re.findall(
1130             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1131         if matches:
1132             return _playlist_from_matches(
1133                 matches, lambda m: unescapeHTML(m[1]))
1134
1135         # Look for embedded Dailymotion playlist player (#3822)
1136         m = re.search(
1137             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1138         if m:
1139             playlists = re.findall(
1140                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1141             if playlists:
1142                 return _playlist_from_matches(
1143                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1144
1145         # Look for embedded Wistia player
1146         match = re.search(
1147             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1148         if match:
1149             embed_url = self._proto_relative_url(
1150                 unescapeHTML(match.group('url')))
1151             return {
1152                 '_type': 'url_transparent',
1153                 'url': embed_url,
1154                 'ie_key': 'Wistia',
1155                 'uploader': video_uploader,
1156                 'title': video_title,
1157                 'id': video_id,
1158             }
1159
1160         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1161         if match:
1162             return {
1163                 '_type': 'url_transparent',
1164                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1165                 'ie_key': 'Wistia',
1166                 'uploader': video_uploader,
1167                 'title': video_title,
1168                 'id': match.group('id')
1169             }
1170
1171         # Look for embedded blip.tv player
1172         bliptv_url = BlipTVIE._extract_url(webpage)
1173         if bliptv_url:
1174             return self.url_result(bliptv_url, 'BlipTV')
1175
1176         # Look for SVT player
1177         svt_url = SVTIE._extract_url(webpage)
1178         if svt_url:
1179             return self.url_result(svt_url, 'SVT')
1180
1181         # Look for embedded condenast player
1182         matches = re.findall(
1183             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1184             webpage)
1185         if matches:
1186             return {
1187                 '_type': 'playlist',
1188                 'entries': [{
1189                     '_type': 'url',
1190                     'ie_key': 'CondeNast',
1191                     'url': ma,
1192                 } for ma in matches],
1193                 'title': video_title,
1194                 'id': video_id,
1195             }
1196
1197         # Look for Bandcamp pages with custom domain
1198         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1199         if mobj is not None:
1200             burl = unescapeHTML(mobj.group(1))
1201             # Don't set the extractor because it can be a track url or an album
1202             return self.url_result(burl)
1203
1204         # Look for embedded Vevo player
1205         mobj = re.search(
1206             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1207         if mobj is not None:
1208             return self.url_result(mobj.group('url'))
1209
1210         # Look for embedded Viddler player
1211         mobj = re.search(
1212             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1213             webpage)
1214         if mobj is not None:
1215             return self.url_result(mobj.group('url'))
1216
1217         # Look for NYTimes player
1218         mobj = re.search(
1219             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1220             webpage)
1221         if mobj is not None:
1222             return self.url_result(mobj.group('url'))
1223
1224         # Look for Libsyn player
1225         mobj = re.search(
1226             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1227         if mobj is not None:
1228             return self.url_result(mobj.group('url'))
1229
1230         # Look for Ooyala videos
1231         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1232                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1233                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1234                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1235         if mobj is not None:
1236             return OoyalaIE._build_url_result(mobj.group('ec'))
1237
1238         # Look for multiple Ooyala embeds on SBN network websites
1239         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1240         if mobj is not None:
1241             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1242             if embeds:
1243                 return _playlist_from_matches(
1244                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1245
1246         # Look for Aparat videos
1247         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1248         if mobj is not None:
1249             return self.url_result(mobj.group(1), 'Aparat')
1250
1251         # Look for MPORA videos
1252         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1253         if mobj is not None:
1254             return self.url_result(mobj.group(1), 'Mpora')
1255
1256         # Look for embedded NovaMov-based player
1257         mobj = re.search(
1258             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1259                     (?P<url>http://(?:(?:embed|www)\.)?
1260                         (?:novamov\.com|
1261                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1262                            videoweed\.(?:es|com)|
1263                            movshare\.(?:net|sx|ag)|
1264                            divxstage\.(?:eu|net|ch|co|at|ag))
1265                         /embed\.php.+?)\1''', webpage)
1266         if mobj is not None:
1267             return self.url_result(mobj.group('url'))
1268
1269         # Look for embedded Facebook player
1270         mobj = re.search(
1271             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1272         if mobj is not None:
1273             return self.url_result(mobj.group('url'), 'Facebook')
1274
1275         # Look for embedded VK player
1276         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1277         if mobj is not None:
1278             return self.url_result(mobj.group('url'), 'VK')
1279
1280         # Look for embedded ivi player
1281         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1282         if mobj is not None:
1283             return self.url_result(mobj.group('url'), 'Ivi')
1284
1285         # Look for embedded Huffington Post player
1286         mobj = re.search(
1287             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1288         if mobj is not None:
1289             return self.url_result(mobj.group('url'), 'HuffPost')
1290
1291         # Look for embed.ly
1292         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1293         if mobj is not None:
1294             return self.url_result(mobj.group('url'))
1295         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1296         if mobj is not None:
1297             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1298
1299         # Look for funnyordie embed
1300         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1301         if matches:
1302             return _playlist_from_matches(
1303                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1304
1305         # Look for BBC iPlayer embed
1306         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1307         if matches:
1308             return _playlist_from_matches(matches, ie='BBCCoUk')
1309
1310         # Look for embedded RUTV player
1311         rutv_url = RUTVIE._extract_url(webpage)
1312         if rutv_url:
1313             return self.url_result(rutv_url, 'RUTV')
1314
1315         # Look for embedded TVC player
1316         tvc_url = TVCIE._extract_url(webpage)
1317         if tvc_url:
1318             return self.url_result(tvc_url, 'TVC')
1319
1320         # Look for embedded SportBox player
1321         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1322         if sportbox_urls:
1323             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1324
1325         # Look for embedded PornHub player
1326         pornhub_url = PornHubIE._extract_url(webpage)
1327         if pornhub_url:
1328             return self.url_result(pornhub_url, 'PornHub')
1329
1330         # Look for embedded Tvigle player
1331         mobj = re.search(
1332             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1333         if mobj is not None:
1334             return self.url_result(mobj.group('url'), 'Tvigle')
1335
1336         # Look for embedded TED player
1337         mobj = re.search(
1338             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1339         if mobj is not None:
1340             return self.url_result(mobj.group('url'), 'TED')
1341
1342         # Look for embedded Ustream videos
1343         mobj = re.search(
1344             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1345         if mobj is not None:
1346             return self.url_result(mobj.group('url'), 'Ustream')
1347
1348         # Look for embedded arte.tv player
1349         mobj = re.search(
1350             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1351             webpage)
1352         if mobj is not None:
1353             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1354
1355         # Look for embedded smotri.com player
1356         smotri_url = SmotriIE._extract_url(webpage)
1357         if smotri_url:
1358             return self.url_result(smotri_url, 'Smotri')
1359
1360         # Look for embeded soundcloud player
1361         mobj = re.search(
1362             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1363             webpage)
1364         if mobj is not None:
1365             url = unescapeHTML(mobj.group('url'))
1366             return self.url_result(url)
1367
1368         # Look for embedded vulture.com player
1369         mobj = re.search(
1370             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1371             webpage)
1372         if mobj is not None:
1373             url = unescapeHTML(mobj.group('url'))
1374             return self.url_result(url, ie='Vulture')
1375
1376         # Look for embedded mtvservices player
1377         mobj = re.search(
1378             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1379             webpage)
1380         if mobj is not None:
1381             url = unescapeHTML(mobj.group('url'))
1382             return self.url_result(url, ie='MTVServicesEmbedded')
1383
1384         # Look for embedded yahoo player
1385         mobj = re.search(
1386             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1387             webpage)
1388         if mobj is not None:
1389             return self.url_result(mobj.group('url'), 'Yahoo')
1390
1391         # Look for embedded sbs.com.au player
1392         mobj = re.search(
1393             r'''(?x)
1394             (?:
1395                 <meta\s+property="og:video"\s+content=|
1396                 <iframe[^>]+?src=
1397             )
1398             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1399             webpage)
1400         if mobj is not None:
1401             return self.url_result(mobj.group('url'), 'SBS')
1402
1403         # Look for embedded Cinchcast player
1404         mobj = re.search(
1405             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1406             webpage)
1407         if mobj is not None:
1408             return self.url_result(mobj.group('url'), 'Cinchcast')
1409
1410         mobj = re.search(
1411             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1412             webpage)
1413         if not mobj:
1414             mobj = re.search(
1415                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1416                 webpage)
1417         if mobj is not None:
1418             return self.url_result(mobj.group('url'), 'MLB')
1419
1420         mobj = re.search(
1421             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1422             webpage)
1423         if mobj is not None:
1424             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1425
1426         mobj = re.search(
1427             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1428             webpage)
1429         if mobj is not None:
1430             return self.url_result(mobj.group('url'), 'Livestream')
1431
1432         # Look for Zapiks embed
1433         mobj = re.search(
1434             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1435         if mobj is not None:
1436             return self.url_result(mobj.group('url'), 'Zapiks')
1437
1438         # Look for Kaltura embeds
1439         mobj = re.search(
1440             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1441         if mobj is not None:
1442             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1443
1444         # Look for Eagle.Platform embeds
1445         mobj = re.search(
1446             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1447         if mobj is not None:
1448             return self.url_result(mobj.group('url'), 'EaglePlatform')
1449
1450         # Look for ClipYou (uses Eagle.Platform) embeds
1451         mobj = re.search(
1452             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1453         if mobj is not None:
1454             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1455
1456         # Look for Pladform embeds
1457         mobj = re.search(
1458             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1459         if mobj is not None:
1460             return self.url_result(mobj.group('url'), 'Pladform')
1461
1462         # Look for Playwire embeds
1463         mobj = re.search(
1464             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1465         if mobj is not None:
1466             return self.url_result(mobj.group('url'))
1467
1468         # Look for 5min embeds
1469         mobj = re.search(
1470             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1471         if mobj is not None:
1472             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1473
1474         # Look for Crooks and Liars embeds
1475         mobj = re.search(
1476             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1477         if mobj is not None:
1478             return self.url_result(mobj.group('url'))
1479
1480         # Look for NBC Sports VPlayer embeds
1481         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1482         if nbc_sports_url:
1483             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1484
1485         # Look for UDN embeds
1486         mobj = re.search(
1487             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1488         if mobj is not None:
1489             return self.url_result(
1490                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1491
1492         # Look for Senate ISVP iframe
1493         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1494         if senate_isvp_url:
1495             return self.url_result(senate_isvp_url, 'SenateISVP')
1496
1497         def check_video(vurl):
1498             if YoutubeIE.suitable(vurl):
1499                 return True
1500             vpath = compat_urlparse.urlparse(vurl).path
1501             vext = determine_ext(vpath)
1502             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1503
1504         def filter_video(urls):
1505             return list(filter(check_video, urls))
1506
1507         # Start with something easy: JW Player in SWFObject
1508         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1509         if not found:
1510             # Look for gorilla-vid style embedding
1511             found = filter_video(re.findall(r'''(?sx)
1512                 (?:
1513                     jw_plugins|
1514                     JWPlayerOptions|
1515                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1516                 )
1517                 .*?
1518                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1519         if not found:
1520             # Broaden the search a little bit
1521             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1522         if not found:
1523             # Broaden the findall a little bit: JWPlayer JS loader
1524             found = filter_video(re.findall(
1525                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1526         if not found:
1527             # Flow player
1528             found = filter_video(re.findall(r'''(?xs)
1529                 flowplayer\("[^"]+",\s*
1530                     \{[^}]+?\}\s*,
1531                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1532                         ["']?url["']?\s*:\s*["']([^"']+)["']
1533             ''', webpage))
1534         if not found:
1535             # Cinerama player
1536             found = re.findall(
1537                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1538         if not found:
1539             # Try to find twitter cards info
1540             found = filter_video(re.findall(
1541                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1542         if not found:
1543             # We look for Open Graph info:
1544             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1545             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1546             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1547             if m_video_type is not None:
1548                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1549         if not found:
1550             # HTML5 video
1551             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1552         if not found:
1553             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1554             found = re.search(
1555                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1556                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1557                 webpage)
1558             if not found:
1559                 # Look also in Refresh HTTP header
1560                 refresh_header = head_response.headers.get('Refresh')
1561                 if refresh_header:
1562                     found = re.search(REDIRECT_REGEX, refresh_header)
1563             if found:
1564                 new_url = compat_urlparse.urljoin(url, found.group(1))
1565                 self.report_following_redirect(new_url)
1566                 return {
1567                     '_type': 'url',
1568                     'url': new_url,
1569                 }
1570         if not found:
1571             raise UnsupportedError(url)
1572
1573         entries = []
1574         for video_url in found:
1575             video_url = compat_urlparse.urljoin(url, video_url)
1576             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1577
1578             # Sometimes, jwplayer extraction will result in a YouTube URL
1579             if YoutubeIE.suitable(video_url):
1580                 entries.append(self.url_result(video_url, 'Youtube'))
1581                 continue
1582
1583             # here's a fun little line of code for you:
1584             video_id = os.path.splitext(video_id)[0]
1585
1586             if determine_ext(video_url) == 'smil':
1587                 entries.append({
1588                     'id': video_id,
1589                     'formats': self._extract_smil_formats(video_url, video_id),
1590                     'uploader': video_uploader,
1591                     'title': video_title,
1592                     'age_limit': age_limit,
1593                 })
1594             else:
1595                 entries.append({
1596                     'id': video_id,
1597                     'url': video_url,
1598                     'uploader': video_uploader,
1599                     'title': video_title,
1600                     'age_limit': age_limit,
1601                 })
1602
1603         if len(entries) == 1:
1604             return entries[0]
1605         else:
1606             for num, e in enumerate(entries, start=1):
1607                 # 'url' results don't have a title
1608                 if e.get('title') is not None:
1609                     e['title'] = '%s (%d)' % (e['title'], num)
1610             return {
1611                 '_type': 'playlist',
1612                 'entries': entries,
1613             }