[dailymotion/generic] Add DailymotionCloudIE
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7
8 from .common import InfoExtractor
9 from .youtube import YoutubeIE
10 from ..compat import (
11     compat_urllib_parse,
12     compat_urllib_parse_unquote,
13     compat_urllib_request,
14     compat_urlparse,
15     compat_xml_parse_error,
16 )
17 from ..utils import (
18     determine_ext,
19     ExtractorError,
20     float_or_none,
21     HEADRequest,
22     is_html,
23     orderedSet,
24     parse_xml,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import BrightcoveIE
34 from .nbc import NBCSportsVPlayerIE
35 from .ooyala import OoyalaIE
36 from .rutv import RUTVIE
37 from .tvc import TVCIE
38 from .sportbox import SportBoxEmbedIE
39 from .smotri import SmotriIE
40 from .condenast import CondeNastIE
41 from .udn import UDNEmbedIE
42 from .senateisvp import SenateISVPIE
43 from .bliptv import BlipTVIE
44 from .svt import SVTIE
45 from .pornhub import PornHubIE
46 from .vimeo import VimeoIE
47 from .dailymotion import DailymotionCloudIE
48
49
50 class GenericIE(InfoExtractor):
51     IE_DESC = 'Generic downloader that works on some sites'
52     _VALID_URL = r'.*'
53     IE_NAME = 'generic'
54     _TESTS = [
55         # Direct link to a video
56         {
57             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
58             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
59             'info_dict': {
60                 'id': 'trailer',
61                 'ext': 'mp4',
62                 'title': 'trailer',
63                 'upload_date': '20100513',
64             }
65         },
66         # Direct link to media delivered compressed (until Accept-Encoding is *)
67         {
68             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
69             'md5': '128c42e68b13950268b648275386fc74',
70             'info_dict': {
71                 'id': 'FictionJunction-Parallel_Hearts',
72                 'ext': 'flac',
73                 'title': 'FictionJunction-Parallel_Hearts',
74                 'upload_date': '20140522',
75             },
76             'expected_warnings': [
77                 'URL could be a direct video link, returning it as such.'
78             ]
79         },
80         # Direct download with broken HEAD
81         {
82             'url': 'http://ai-radio.org:8000/radio.opus',
83             'info_dict': {
84                 'id': 'radio',
85                 'ext': 'opus',
86                 'title': 'radio',
87             },
88             'params': {
89                 'skip_download': True,  # infinite live stream
90             },
91             'expected_warnings': [
92                 r'501.*Not Implemented'
93             ],
94         },
95         # Direct link with incorrect MIME type
96         {
97             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
98             'md5': '4ccbebe5f36706d85221f204d7eb5913',
99             'info_dict': {
100                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
101                 'id': '5_Lennart_Poettering_-_Systemd',
102                 'ext': 'webm',
103                 'title': '5_Lennart_Poettering_-_Systemd',
104                 'upload_date': '20141120',
105             },
106             'expected_warnings': [
107                 'URL could be a direct video link, returning it as such.'
108             ]
109         },
110         # RSS feed
111         {
112             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
113             'info_dict': {
114                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
115                 'title': 'Zero Punctuation',
116                 'description': 're:.*groundbreaking video review series.*'
117             },
118             'playlist_mincount': 11,
119         },
120         # RSS feed with enclosure
121         {
122             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
123             'info_dict': {
124                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
125                 'ext': 'm4v',
126                 'upload_date': '20150228',
127                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
128             }
129         },
130         # google redirect
131         {
132             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
133             'info_dict': {
134                 'id': 'cmQHVoWB5FY',
135                 'ext': 'mp4',
136                 'upload_date': '20130224',
137                 'uploader_id': 'TheVerge',
138                 'description': 're:^Chris Ziegler takes a look at the\.*',
139                 'uploader': 'The Verge',
140                 'title': 'First Firefox OS phones side-by-side',
141             },
142             'params': {
143                 'skip_download': False,
144             }
145         },
146         {
147             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
148             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
149             'info_dict': {
150                 'id': '13601338388002',
151                 'ext': 'mp4',
152                 'uploader': 'www.hodiho.fr',
153                 'title': 'R\u00e9gis plante sa Jeep',
154             }
155         },
156         # bandcamp page with custom domain
157         {
158             'add_ie': ['Bandcamp'],
159             'url': 'http://bronyrock.com/track/the-pony-mash',
160             'info_dict': {
161                 'id': '3235767654',
162                 'ext': 'mp3',
163                 'title': 'The Pony Mash',
164                 'uploader': 'M_Pallante',
165             },
166             'skip': 'There is a limit of 200 free downloads / month for the test song',
167         },
168         # embedded brightcove video
169         # it also tests brightcove videos that need to set the 'Referer' in the
170         # http requests
171         {
172             'add_ie': ['Brightcove'],
173             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
174             'info_dict': {
175                 'id': '2765128793001',
176                 'ext': 'mp4',
177                 'title': 'Le cours de bourse : l’analyse technique',
178                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
179                 'uploader': 'BFM BUSINESS',
180             },
181             'params': {
182                 'skip_download': True,
183             },
184         },
185         {
186             # https://github.com/rg3/youtube-dl/issues/2253
187             'url': 'http://bcove.me/i6nfkrc3',
188             'md5': '0ba9446db037002366bab3b3eb30c88c',
189             'info_dict': {
190                 'id': '3101154703001',
191                 'ext': 'mp4',
192                 'title': 'Still no power',
193                 'uploader': 'thestar.com',
194                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
195             },
196             'add_ie': ['Brightcove'],
197         },
198         {
199             'url': 'http://www.championat.com/video/football/v/87/87499.html',
200             'md5': 'fb973ecf6e4a78a67453647444222983',
201             'info_dict': {
202                 'id': '3414141473001',
203                 'ext': 'mp4',
204                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
205                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
206                 'uploader': 'Championat',
207             },
208         },
209         {
210             # https://github.com/rg3/youtube-dl/issues/3541
211             'add_ie': ['Brightcove'],
212             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
213             'info_dict': {
214                 'id': '3866516442001',
215                 'ext': 'mp4',
216                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
217                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
218                 'uploader': 'SBS Broadcasting',
219             },
220             'skip': 'Restricted to Netherlands',
221             'params': {
222                 'skip_download': True,  # m3u8 download
223             },
224         },
225         # ooyala video
226         {
227             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
228             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
229             'info_dict': {
230                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
231                 'ext': 'mp4',
232                 'title': '2cc213299525360.mov',  # that's what we get
233             },
234             'add_ie': ['Ooyala'],
235         },
236         # multiple ooyala embeds on SBN network websites
237         {
238             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
239             'info_dict': {
240                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
241                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
242             },
243             'playlist_mincount': 3,
244             'params': {
245                 'skip_download': True,
246             },
247             'add_ie': ['Ooyala'],
248         },
249         # embed.ly video
250         {
251             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
252             'info_dict': {
253                 'id': '9ODmcdjQcHQ',
254                 'ext': 'mp4',
255                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
256                 'upload_date': '20140225',
257                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
258                 'uploader': 'Tested',
259                 'uploader_id': 'testedcom',
260             },
261             # No need to test YoutubeIE here
262             'params': {
263                 'skip_download': True,
264             },
265         },
266         # funnyordie embed
267         {
268             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
269             'info_dict': {
270                 'id': '18e820ec3f',
271                 'ext': 'mp4',
272                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
273                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
274             },
275         },
276         # BBC iPlayer embeds
277         {
278             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
279             'info_dict': {
280                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
281             },
282             'playlist_mincount': 18,
283         },
284         # RUTV embed
285         {
286             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
287             'info_dict': {
288                 'id': '776940',
289                 'ext': 'mp4',
290                 'title': 'Охотское море стало целиком российским',
291                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
292             },
293             'params': {
294                 # m3u8 download
295                 'skip_download': True,
296             },
297         },
298         # TVC embed
299         {
300             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
301             'info_dict': {
302                 'id': '55304',
303                 'ext': 'mp4',
304                 'title': 'Дошкольное воспитание',
305             },
306         },
307         # SportBox embed
308         {
309             'url': 'http://www.vestifinance.ru/articles/25753',
310             'info_dict': {
311                 'id': '25753',
312                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
313             },
314             'playlist': [{
315                 'info_dict': {
316                     'id': '370908',
317                     'title': 'Госзаказ. День 3',
318                     'ext': 'mp4',
319                 }
320             }, {
321                 'info_dict': {
322                     'id': '370905',
323                     'title': 'Госзаказ. День 2',
324                     'ext': 'mp4',
325                 }
326             }, {
327                 'info_dict': {
328                     'id': '370902',
329                     'title': 'Госзаказ. День 1',
330                     'ext': 'mp4',
331                 }
332             }],
333             'params': {
334                 # m3u8 download
335                 'skip_download': True,
336             },
337         },
338         # Embedded TED video
339         {
340             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
341             'md5': '65fdff94098e4a607385a60c5177c638',
342             'info_dict': {
343                 'id': '1969',
344                 'ext': 'mp4',
345                 'title': 'Hidden miracles of the natural world',
346                 'uploader': 'Louie Schwartzberg',
347                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
348             }
349         },
350         # Embeded Ustream video
351         {
352             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
353             'md5': '27b99cdb639c9b12a79bca876a073417',
354             'info_dict': {
355                 'id': '45734260',
356                 'ext': 'flv',
357                 'uploader': 'AU SPA:  The NSA and Privacy',
358                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
359             }
360         },
361         # nowvideo embed hidden behind percent encoding
362         {
363             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
364             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
365             'info_dict': {
366                 'id': '06e53103ca9aa',
367                 'ext': 'flv',
368                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
369                 'description': 'No description',
370             },
371         },
372         # arte embed
373         {
374             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
375             'md5': '7653032cbb25bf6c80d80f217055fa43',
376             'info_dict': {
377                 'id': '048195-004_PLUS7-F',
378                 'ext': 'flv',
379                 'title': 'X:enius',
380                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
381                 'upload_date': '20140320',
382             },
383             'params': {
384                 'skip_download': 'Requires rtmpdump'
385             }
386         },
387         # Condé Nast embed
388         {
389             'url': 'http://www.wired.com/2014/04/honda-asimo/',
390             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
391             'info_dict': {
392                 'id': '53501be369702d3275860000',
393                 'ext': 'mp4',
394                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
395             }
396         },
397         # Dailymotion embed
398         {
399             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
400             'md5': '441aeeb82eb72c422c7f14ec533999cd',
401             'info_dict': {
402                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
403                 'ext': 'mp4',
404                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
405                 'uploader': 'Spi0n',
406             },
407             'add_ie': ['Dailymotion'],
408         },
409         # YouTube embed
410         {
411             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
412             'info_dict': {
413                 'id': 'FXRb4ykk4S0',
414                 'ext': 'mp4',
415                 'title': 'The NBL Auction 2014',
416                 'uploader': 'BADMINTON England',
417                 'uploader_id': 'BADMINTONEvents',
418                 'upload_date': '20140603',
419                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
420             },
421             'add_ie': ['Youtube'],
422             'params': {
423                 'skip_download': True,
424             }
425         },
426         # MTVSercices embed
427         {
428             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
429             'md5': '35727f82f58c76d996fc188f9755b0d5',
430             'info_dict': {
431                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
432                 'ext': 'mp4',
433                 'title': 'Review',
434                 'description': 'Mario\'s life in the fast lane has never looked so good.',
435             },
436         },
437         # YouTube embed via <data-embed-url="">
438         {
439             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
440             'info_dict': {
441                 'id': '4vAffPZIT44',
442                 'ext': 'mp4',
443                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
444                 'uploader': 'Gameloft',
445                 'uploader_id': 'gameloft',
446                 'upload_date': '20140828',
447                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
448             },
449             'params': {
450                 'skip_download': True,
451             }
452         },
453         # Camtasia studio
454         {
455             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
456             'playlist': [{
457                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
458                 'info_dict': {
459                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
460                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
461                     'ext': 'flv',
462                     'duration': 2235.90,
463                 }
464             }, {
465                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
466                 'info_dict': {
467                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
468                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
469                     'ext': 'flv',
470                     'duration': 2235.93,
471                 }
472             }],
473             'info_dict': {
474                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
475             }
476         },
477         # Flowplayer
478         {
479             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
480             'md5': '9d65602bf31c6e20014319c7d07fba27',
481             'info_dict': {
482                 'id': '5123ea6d5e5a7',
483                 'ext': 'mp4',
484                 'age_limit': 18,
485                 'uploader': 'www.handjobhub.com',
486                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
487             }
488         },
489         # Multiple brightcove videos
490         # https://github.com/rg3/youtube-dl/issues/2283
491         {
492             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
493             'info_dict': {
494                 'id': 'always-never',
495                 'title': 'Always / Never - The New Yorker',
496             },
497             'playlist_count': 3,
498             'params': {
499                 'extract_flat': False,
500                 'skip_download': True,
501             }
502         },
503         # MLB embed
504         {
505             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
506             'md5': '96f09a37e44da40dd083e12d9a683327',
507             'info_dict': {
508                 'id': '33322633',
509                 'ext': 'mp4',
510                 'title': 'Ump changes call to ball',
511                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
512                 'duration': 48,
513                 'timestamp': 1401537900,
514                 'upload_date': '20140531',
515                 'thumbnail': 're:^https?://.*\.jpg$',
516             },
517         },
518         # Wistia embed
519         {
520             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
521             'md5': '8788b683c777a5cf25621eaf286d0c23',
522             'info_dict': {
523                 'id': '1cfaf6b7ea',
524                 'ext': 'mov',
525                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
526                 'duration': 643.0,
527                 'filesize': 182808282,
528                 'uploader': 'education-portal.com',
529             },
530         },
531         {
532             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
533             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
534             'info_dict': {
535                 'id': 'uxjb0lwrcz',
536                 'ext': 'mp4',
537                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
538                 'duration': 1715.0,
539                 'uploader': 'thoughtworks.wistia.com',
540             },
541         },
542         # Soundcloud embed
543         {
544             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
545             'info_dict': {
546                 'id': '174391317',
547                 'ext': 'mp3',
548                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
549                 'uploader': 'Sophos Security',
550                 'title': 'Chet Chat 171 - Oct 29, 2014',
551                 'upload_date': '20141029',
552             }
553         },
554         # Livestream embed
555         {
556             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
557             'info_dict': {
558                 'id': '67864563',
559                 'ext': 'flv',
560                 'upload_date': '20141112',
561                 'title': 'Rosetta #CometLanding webcast HL 10',
562             }
563         },
564         # LazyYT
565         {
566             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
567             'info_dict': {
568                 'id': '1986',
569                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
570             },
571             'playlist_mincount': 2,
572         },
573         # Cinchcast embed
574         {
575             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
576             'info_dict': {
577                 'id': '7141703',
578                 'ext': 'mp3',
579                 'upload_date': '20141126',
580                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
581             }
582         },
583         # Cinerama player
584         {
585             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
586             'info_dict': {
587                 'id': '730m_DandD_1901_512k',
588                 'ext': 'mp4',
589                 'uploader': 'www.abc.net.au',
590                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
591             }
592         },
593         # embedded viddler video
594         {
595             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
596             'info_dict': {
597                 'id': '4d03aad9',
598                 'ext': 'mp4',
599                 'uploader': 'deadspin',
600                 'title': 'WALL-TO-GORTAT',
601                 'timestamp': 1422285291,
602                 'upload_date': '20150126',
603             },
604             'add_ie': ['Viddler'],
605         },
606         # Libsyn embed
607         {
608             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
609             'info_dict': {
610                 'id': '3377616',
611                 'ext': 'mp3',
612                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
613                 'description': 'md5:601cb790edd05908957dae8aaa866465',
614                 'upload_date': '20150220',
615             },
616         },
617         # jwplayer YouTube
618         {
619             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
620             'info_dict': {
621                 'id': 'Mrj4DVp2zeA',
622                 'ext': 'mp4',
623                 'upload_date': '20150212',
624                 'uploader': 'The National Archives UK',
625                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
626                 'uploader_id': 'NationalArchives08',
627                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
628             },
629         },
630         # rtl.nl embed
631         {
632             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
633             'playlist_mincount': 5,
634             'info_dict': {
635                 'id': 'aanslagen-kopenhagen',
636                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
637             }
638         },
639         # Zapiks embed
640         {
641             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
642             'info_dict': {
643                 'id': '118046',
644                 'ext': 'mp4',
645                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
646             }
647         },
648         # Kaltura embed
649         {
650             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
651             'info_dict': {
652                 'id': '1_eergr3h1',
653                 'ext': 'mp4',
654                 'upload_date': '20150226',
655                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
656                 'timestamp': int,
657                 'title': 'John Carlson Postgame 2/25/15',
658             },
659         },
660         # Eagle.Platform embed (generic URL)
661         {
662             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
663             'info_dict': {
664                 'id': '227304',
665                 'ext': 'mp4',
666                 'title': 'Навальный вышел на свободу',
667                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
668                 'thumbnail': 're:^https?://.*\.jpg$',
669                 'duration': 87,
670                 'view_count': int,
671                 'age_limit': 0,
672             },
673         },
674         # ClipYou (Eagle.Platform) embed (custom URL)
675         {
676             'url': 'http://muz-tv.ru/play/7129/',
677             'info_dict': {
678                 'id': '12820',
679                 'ext': 'mp4',
680                 'title': "'O Sole Mio",
681                 'thumbnail': 're:^https?://.*\.jpg$',
682                 'duration': 216,
683                 'view_count': int,
684             },
685         },
686         # Pladform embed
687         {
688             'url': 'http://muz-tv.ru/kinozal/view/7400/',
689             'info_dict': {
690                 'id': '100183293',
691                 'ext': 'mp4',
692                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
693                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
694                 'thumbnail': 're:^https?://.*\.jpg$',
695                 'duration': 694,
696                 'age_limit': 0,
697             },
698         },
699         # Playwire embed
700         {
701             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
702             'info_dict': {
703                 'id': '3519514',
704                 'ext': 'mp4',
705                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
706                 'thumbnail': 're:^https?://.*\.png$',
707                 'duration': 45.115,
708             },
709         },
710         # 5min embed
711         {
712             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
713             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
714             'info_dict': {
715                 'id': '518726732',
716                 'ext': 'mp4',
717                 'title': 'Facebook Creates "On This Day" | Crunch Report',
718             },
719         },
720         # SVT embed
721         {
722             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
723             'info_dict': {
724                 'id': '2900353',
725                 'ext': 'flv',
726                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
727                 'duration': 27,
728                 'age_limit': 0,
729             },
730         },
731         # Crooks and Liars embed
732         {
733             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
734             'info_dict': {
735                 'id': '8RUoRhRi',
736                 'ext': 'mp4',
737                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
738                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
739                 'timestamp': 1428207000,
740                 'upload_date': '20150405',
741                 'uploader': 'Heather',
742             },
743         },
744         # Crooks and Liars external embed
745         {
746             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
747             'info_dict': {
748                 'id': 'MTE3MjUtMzQ2MzA',
749                 'ext': 'mp4',
750                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
751                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
752                 'timestamp': 1265032391,
753                 'upload_date': '20100201',
754                 'uploader': 'Heather',
755             },
756         },
757         # NBC Sports vplayer embed
758         {
759             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
760             'info_dict': {
761                 'id': 'ln7x1qSThw4k',
762                 'ext': 'flv',
763                 'title': "PFT Live: New leader in the 'new-look' defense",
764                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
765             },
766         },
767         # UDN embed
768         {
769             'url': 'http://www.udn.com/news/story/7314/822787',
770             'md5': 'fd2060e988c326991037b9aff9df21a6',
771             'info_dict': {
772                 'id': '300346',
773                 'ext': 'mp4',
774                 'title': '中一中男師變性 全校師生力挺',
775                 'thumbnail': 're:^https?://.*\.jpg$',
776             }
777         },
778         # Ooyala embed
779         {
780             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
781             'info_dict': {
782                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
783                 'ext': 'mp4',
784                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
785                 'title': 'This is what separates the Excel masters from the wannabes',
786             },
787             'params': {
788                 # m3u8 downloads
789                 'skip_download': True,
790             }
791         },
792         # Contains a SMIL manifest
793         {
794             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
795             'info_dict': {
796                 'id': 'file',
797                 'ext': 'flv',
798                 'title': '+ Football: Lottery Champions League Europe',
799                 'uploader': 'www.telewebion.com',
800             },
801             'params': {
802                 # rtmpe downloads
803                 'skip_download': True,
804             }
805         },
806         # Brightcove URL in single quotes
807         {
808             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
809             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
810             'info_dict': {
811                 'id': '4255764656001',
812                 'ext': 'mp4',
813                 'title': 'SN Presents: Russell Martin, World Citizen',
814                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
815                 'uploader': 'Rogers Sportsnet',
816             },
817         },
818         # Dailymotion Cloud video
819         {
820             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
821             'md5': '49444254273501a64675a7e68c502681',
822             'info_dict': {
823                 'id': '5585de919473990de4bee11b',
824                 'ext': 'mp4',
825                 'title': 'Le débat',
826                 'thumbnail': 're:^https?://.*\.jpe?g$',
827             }
828         }
829     ]
830
831     def report_following_redirect(self, new_url):
832         """Report information extraction."""
833         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
834
835     def _extract_rss(self, url, video_id, doc):
836         playlist_title = doc.find('./channel/title').text
837         playlist_desc_el = doc.find('./channel/description')
838         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
839
840         entries = []
841         for it in doc.findall('./channel/item'):
842             next_url = xpath_text(it, 'link', fatal=False)
843             if not next_url:
844                 enclosure_nodes = it.findall('./enclosure')
845                 for e in enclosure_nodes:
846                     next_url = e.attrib.get('url')
847                     if next_url:
848                         break
849
850             if not next_url:
851                 continue
852
853             entries.append({
854                 '_type': 'url',
855                 'url': next_url,
856                 'title': it.find('title').text,
857             })
858
859         return {
860             '_type': 'playlist',
861             'id': url,
862             'title': playlist_title,
863             'description': playlist_desc,
864             'entries': entries,
865         }
866
867     def _extract_camtasia(self, url, video_id, webpage):
868         """ Returns None if no camtasia video can be found. """
869
870         camtasia_cfg = self._search_regex(
871             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
872             webpage, 'camtasia configuration file', default=None)
873         if camtasia_cfg is None:
874             return None
875
876         title = self._html_search_meta('DC.title', webpage, fatal=True)
877
878         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
879         camtasia_cfg = self._download_xml(
880             camtasia_url, video_id,
881             note='Downloading camtasia configuration',
882             errnote='Failed to download camtasia configuration')
883         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
884
885         entries = []
886         for n in fileset_node.getchildren():
887             url_n = n.find('./uri')
888             if url_n is None:
889                 continue
890
891             entries.append({
892                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
893                 'title': '%s - %s' % (title, n.tag),
894                 'url': compat_urlparse.urljoin(url, url_n.text),
895                 'duration': float_or_none(n.find('./duration').text),
896             })
897
898         return {
899             '_type': 'playlist',
900             'entries': entries,
901             'title': title,
902         }
903
904     def _real_extract(self, url):
905         if url.startswith('//'):
906             return {
907                 '_type': 'url',
908                 'url': self.http_scheme() + url,
909             }
910
911         parsed_url = compat_urlparse.urlparse(url)
912         if not parsed_url.scheme:
913             default_search = self._downloader.params.get('default_search')
914             if default_search is None:
915                 default_search = 'fixup_error'
916
917             if default_search in ('auto', 'auto_warning', 'fixup_error'):
918                 if '/' in url:
919                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
920                     return self.url_result('http://' + url)
921                 elif default_search != 'fixup_error':
922                     if default_search == 'auto_warning':
923                         if re.match(r'^(?:url|URL)$', url):
924                             raise ExtractorError(
925                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
926                                 expected=True)
927                         else:
928                             self._downloader.report_warning(
929                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
930                     return self.url_result('ytsearch:' + url)
931
932             if default_search in ('error', 'fixup_error'):
933                 raise ExtractorError(
934                     '%r is not a valid URL. '
935                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
936                     % (url, url), expected=True)
937             else:
938                 if ':' not in default_search:
939                     default_search += ':'
940                 return self.url_result(default_search + url)
941
942         url, smuggled_data = unsmuggle_url(url)
943         force_videoid = None
944         is_intentional = smuggled_data and smuggled_data.get('to_generic')
945         if smuggled_data and 'force_videoid' in smuggled_data:
946             force_videoid = smuggled_data['force_videoid']
947             video_id = force_videoid
948         else:
949             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
950
951         self.to_screen('%s: Requesting header' % video_id)
952
953         head_req = HEADRequest(url)
954         head_response = self._request_webpage(
955             head_req, video_id,
956             note=False, errnote='Could not send HEAD request to %s' % url,
957             fatal=False)
958
959         if head_response is not False:
960             # Check for redirect
961             new_url = head_response.geturl()
962             if url != new_url:
963                 self.report_following_redirect(new_url)
964                 if force_videoid:
965                     new_url = smuggle_url(
966                         new_url, {'force_videoid': force_videoid})
967                 return self.url_result(new_url)
968
969         full_response = None
970         if head_response is False:
971             request = compat_urllib_request.Request(url)
972             request.add_header('Accept-Encoding', '*')
973             full_response = self._request_webpage(request, video_id)
974             head_response = full_response
975
976         # Check for direct link to a video
977         content_type = head_response.headers.get('Content-Type', '')
978         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
979         if m:
980             upload_date = unified_strdate(
981                 head_response.headers.get('Last-Modified'))
982             return {
983                 'id': video_id,
984                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
985                 'direct': True,
986                 'formats': [{
987                     'format_id': m.group('format_id'),
988                     'url': url,
989                     'vcodec': 'none' if m.group('type') == 'audio' else None
990                 }],
991                 'upload_date': upload_date,
992             }
993
994         if not self._downloader.params.get('test', False) and not is_intentional:
995             self._downloader.report_warning('Falling back on generic information extractor.')
996
997         if not full_response:
998             request = compat_urllib_request.Request(url)
999             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1000             # making it impossible to download only chunk of the file (yet we need only 512kB to
1001             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1002             # that will always result in downloading the whole file that is not desirable.
1003             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1004             # to accept raw bytes and being able to download only a chunk.
1005             # It may probably better to solve this by checking Content-Type for application/octet-stream
1006             # after HEAD request finishes, but not sure if we can rely on this.
1007             request.add_header('Accept-Encoding', '*')
1008             full_response = self._request_webpage(request, video_id)
1009
1010         # Maybe it's a direct link to a video?
1011         # Be careful not to download the whole thing!
1012         first_bytes = full_response.read(512)
1013         if not is_html(first_bytes):
1014             self._downloader.report_warning(
1015                 'URL could be a direct video link, returning it as such.')
1016             upload_date = unified_strdate(
1017                 head_response.headers.get('Last-Modified'))
1018             return {
1019                 'id': video_id,
1020                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1021                 'direct': True,
1022                 'url': url,
1023                 'upload_date': upload_date,
1024             }
1025
1026         webpage = self._webpage_read_content(
1027             full_response, url, video_id, prefix=first_bytes)
1028
1029         self.report_extraction(video_id)
1030
1031         # Is it an RSS feed?
1032         try:
1033             doc = parse_xml(webpage)
1034             if doc.tag == 'rss':
1035                 return self._extract_rss(url, video_id, doc)
1036         except compat_xml_parse_error:
1037             pass
1038
1039         # Is it a Camtasia project?
1040         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1041         if camtasia_res is not None:
1042             return camtasia_res
1043
1044         # Sometimes embedded video player is hidden behind percent encoding
1045         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1046         # Unescaping the whole page allows to handle those cases in a generic way
1047         webpage = compat_urllib_parse.unquote(webpage)
1048
1049         # it's tempting to parse this further, but you would
1050         # have to take into account all the variations like
1051         #   Video Title - Site Name
1052         #   Site Name | Video Title
1053         #   Video Title - Tagline | Site Name
1054         # and so on and so forth; it's just not practical
1055         video_title = self._html_search_regex(
1056             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1057             default='video')
1058
1059         # Try to detect age limit automatically
1060         age_limit = self._rta_search(webpage)
1061         # And then there are the jokers who advertise that they use RTA,
1062         # but actually don't.
1063         AGE_LIMIT_MARKERS = [
1064             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1065         ]
1066         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1067             age_limit = 18
1068
1069         # video uploader is domain name
1070         video_uploader = self._search_regex(
1071             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1072
1073         # Helper method
1074         def _playlist_from_matches(matches, getter=None, ie=None):
1075             urlrs = orderedSet(
1076                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1077                 for m in matches)
1078             return self.playlist_result(
1079                 urlrs, playlist_id=video_id, playlist_title=video_title)
1080
1081         # Look for BrightCove:
1082         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1083         if bc_urls:
1084             self.to_screen('Brightcove video detected.')
1085             entries = [{
1086                 '_type': 'url',
1087                 'url': smuggle_url(bc_url, {'Referer': url}),
1088                 'ie_key': 'Brightcove'
1089             } for bc_url in bc_urls]
1090
1091             return {
1092                 '_type': 'playlist',
1093                 'title': video_title,
1094                 'id': video_id,
1095                 'entries': entries,
1096             }
1097
1098         # Look for embedded rtl.nl player
1099         matches = re.findall(
1100             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1101             webpage)
1102         if matches:
1103             return _playlist_from_matches(matches, ie='RtlNl')
1104
1105         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1106         if vimeo_url is not None:
1107             return self.url_result(vimeo_url)
1108
1109         # Look for embedded YouTube player
1110         matches = re.findall(r'''(?x)
1111             (?:
1112                 <iframe[^>]+?src=|
1113                 data-video-url=|
1114                 <embed[^>]+?src=|
1115                 embedSWF\(?:\s*|
1116                 new\s+SWFObject\(
1117             )
1118             (["\'])
1119                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1120                 (?:embed|v|p)/.+?)
1121             \1''', webpage)
1122         if matches:
1123             return _playlist_from_matches(
1124                 matches, lambda m: unescapeHTML(m[1]))
1125
1126         # Look for lazyYT YouTube embed
1127         matches = re.findall(
1128             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1129         if matches:
1130             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1131
1132         # Look for embedded Dailymotion player
1133         matches = re.findall(
1134             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1135         if matches:
1136             return _playlist_from_matches(
1137                 matches, lambda m: unescapeHTML(m[1]))
1138
1139         # Look for embedded Dailymotion playlist player (#3822)
1140         m = re.search(
1141             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1142         if m:
1143             playlists = re.findall(
1144                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1145             if playlists:
1146                 return _playlist_from_matches(
1147                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1148
1149         # Look for embedded Wistia player
1150         match = re.search(
1151             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1152         if match:
1153             embed_url = self._proto_relative_url(
1154                 unescapeHTML(match.group('url')))
1155             return {
1156                 '_type': 'url_transparent',
1157                 'url': embed_url,
1158                 'ie_key': 'Wistia',
1159                 'uploader': video_uploader,
1160                 'title': video_title,
1161                 'id': video_id,
1162             }
1163
1164         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1165         if match:
1166             return {
1167                 '_type': 'url_transparent',
1168                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1169                 'ie_key': 'Wistia',
1170                 'uploader': video_uploader,
1171                 'title': video_title,
1172                 'id': match.group('id')
1173             }
1174
1175         # Look for embedded blip.tv player
1176         bliptv_url = BlipTVIE._extract_url(webpage)
1177         if bliptv_url:
1178             return self.url_result(bliptv_url, 'BlipTV')
1179
1180         # Look for SVT player
1181         svt_url = SVTIE._extract_url(webpage)
1182         if svt_url:
1183             return self.url_result(svt_url, 'SVT')
1184
1185         # Look for embedded condenast player
1186         matches = re.findall(
1187             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1188             webpage)
1189         if matches:
1190             return {
1191                 '_type': 'playlist',
1192                 'entries': [{
1193                     '_type': 'url',
1194                     'ie_key': 'CondeNast',
1195                     'url': ma,
1196                 } for ma in matches],
1197                 'title': video_title,
1198                 'id': video_id,
1199             }
1200
1201         # Look for Bandcamp pages with custom domain
1202         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1203         if mobj is not None:
1204             burl = unescapeHTML(mobj.group(1))
1205             # Don't set the extractor because it can be a track url or an album
1206             return self.url_result(burl)
1207
1208         # Look for embedded Vevo player
1209         mobj = re.search(
1210             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1211         if mobj is not None:
1212             return self.url_result(mobj.group('url'))
1213
1214         # Look for embedded Viddler player
1215         mobj = re.search(
1216             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1217             webpage)
1218         if mobj is not None:
1219             return self.url_result(mobj.group('url'))
1220
1221         # Look for NYTimes player
1222         mobj = re.search(
1223             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1224             webpage)
1225         if mobj is not None:
1226             return self.url_result(mobj.group('url'))
1227
1228         # Look for Libsyn player
1229         mobj = re.search(
1230             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1231         if mobj is not None:
1232             return self.url_result(mobj.group('url'))
1233
1234         # Look for Ooyala videos
1235         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1236                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1237                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1238                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1239         if mobj is not None:
1240             return OoyalaIE._build_url_result(mobj.group('ec'))
1241
1242         # Look for multiple Ooyala embeds on SBN network websites
1243         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1244         if mobj is not None:
1245             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1246             if embeds:
1247                 return _playlist_from_matches(
1248                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1249
1250         # Look for Aparat videos
1251         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1252         if mobj is not None:
1253             return self.url_result(mobj.group(1), 'Aparat')
1254
1255         # Look for MPORA videos
1256         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1257         if mobj is not None:
1258             return self.url_result(mobj.group(1), 'Mpora')
1259
1260         # Look for embedded NovaMov-based player
1261         mobj = re.search(
1262             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1263                     (?P<url>http://(?:(?:embed|www)\.)?
1264                         (?:novamov\.com|
1265                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1266                            videoweed\.(?:es|com)|
1267                            movshare\.(?:net|sx|ag)|
1268                            divxstage\.(?:eu|net|ch|co|at|ag))
1269                         /embed\.php.+?)\1''', webpage)
1270         if mobj is not None:
1271             return self.url_result(mobj.group('url'))
1272
1273         # Look for embedded Facebook player
1274         mobj = re.search(
1275             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1276         if mobj is not None:
1277             return self.url_result(mobj.group('url'), 'Facebook')
1278
1279         # Look for embedded VK player
1280         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1281         if mobj is not None:
1282             return self.url_result(mobj.group('url'), 'VK')
1283
1284         # Look for embedded ivi player
1285         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1286         if mobj is not None:
1287             return self.url_result(mobj.group('url'), 'Ivi')
1288
1289         # Look for embedded Huffington Post player
1290         mobj = re.search(
1291             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1292         if mobj is not None:
1293             return self.url_result(mobj.group('url'), 'HuffPost')
1294
1295         # Look for embed.ly
1296         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1297         if mobj is not None:
1298             return self.url_result(mobj.group('url'))
1299         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1300         if mobj is not None:
1301             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1302
1303         # Look for funnyordie embed
1304         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1305         if matches:
1306             return _playlist_from_matches(
1307                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1308
1309         # Look for BBC iPlayer embed
1310         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1311         if matches:
1312             return _playlist_from_matches(matches, ie='BBCCoUk')
1313
1314         # Look for embedded RUTV player
1315         rutv_url = RUTVIE._extract_url(webpage)
1316         if rutv_url:
1317             return self.url_result(rutv_url, 'RUTV')
1318
1319         # Look for embedded TVC player
1320         tvc_url = TVCIE._extract_url(webpage)
1321         if tvc_url:
1322             return self.url_result(tvc_url, 'TVC')
1323
1324         # Look for embedded SportBox player
1325         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1326         if sportbox_urls:
1327             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1328
1329         # Look for embedded PornHub player
1330         pornhub_url = PornHubIE._extract_url(webpage)
1331         if pornhub_url:
1332             return self.url_result(pornhub_url, 'PornHub')
1333
1334         # Look for embedded Tvigle player
1335         mobj = re.search(
1336             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1337         if mobj is not None:
1338             return self.url_result(mobj.group('url'), 'Tvigle')
1339
1340         # Look for embedded TED player
1341         mobj = re.search(
1342             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1343         if mobj is not None:
1344             return self.url_result(mobj.group('url'), 'TED')
1345
1346         # Look for embedded Ustream videos
1347         mobj = re.search(
1348             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1349         if mobj is not None:
1350             return self.url_result(mobj.group('url'), 'Ustream')
1351
1352         # Look for embedded arte.tv player
1353         mobj = re.search(
1354             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1355             webpage)
1356         if mobj is not None:
1357             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1358
1359         # Look for embedded smotri.com player
1360         smotri_url = SmotriIE._extract_url(webpage)
1361         if smotri_url:
1362             return self.url_result(smotri_url, 'Smotri')
1363
1364         # Look for embeded soundcloud player
1365         mobj = re.search(
1366             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1367             webpage)
1368         if mobj is not None:
1369             url = unescapeHTML(mobj.group('url'))
1370             return self.url_result(url)
1371
1372         # Look for embedded vulture.com player
1373         mobj = re.search(
1374             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1375             webpage)
1376         if mobj is not None:
1377             url = unescapeHTML(mobj.group('url'))
1378             return self.url_result(url, ie='Vulture')
1379
1380         # Look for embedded mtvservices player
1381         mobj = re.search(
1382             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1383             webpage)
1384         if mobj is not None:
1385             url = unescapeHTML(mobj.group('url'))
1386             return self.url_result(url, ie='MTVServicesEmbedded')
1387
1388         # Look for embedded yahoo player
1389         mobj = re.search(
1390             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1391             webpage)
1392         if mobj is not None:
1393             return self.url_result(mobj.group('url'), 'Yahoo')
1394
1395         # Look for embedded sbs.com.au player
1396         mobj = re.search(
1397             r'''(?x)
1398             (?:
1399                 <meta\s+property="og:video"\s+content=|
1400                 <iframe[^>]+?src=
1401             )
1402             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1403             webpage)
1404         if mobj is not None:
1405             return self.url_result(mobj.group('url'), 'SBS')
1406
1407         # Look for embedded Cinchcast player
1408         mobj = re.search(
1409             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1410             webpage)
1411         if mobj is not None:
1412             return self.url_result(mobj.group('url'), 'Cinchcast')
1413
1414         mobj = re.search(
1415             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1416             webpage)
1417         if not mobj:
1418             mobj = re.search(
1419                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1420                 webpage)
1421         if mobj is not None:
1422             return self.url_result(mobj.group('url'), 'MLB')
1423
1424         mobj = re.search(
1425             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1426             webpage)
1427         if mobj is not None:
1428             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1429
1430         mobj = re.search(
1431             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1432             webpage)
1433         if mobj is not None:
1434             return self.url_result(mobj.group('url'), 'Livestream')
1435
1436         # Look for Zapiks embed
1437         mobj = re.search(
1438             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1439         if mobj is not None:
1440             return self.url_result(mobj.group('url'), 'Zapiks')
1441
1442         # Look for Kaltura embeds
1443         mobj = re.search(
1444             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1445         if mobj is not None:
1446             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1447
1448         # Look for Eagle.Platform embeds
1449         mobj = re.search(
1450             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1451         if mobj is not None:
1452             return self.url_result(mobj.group('url'), 'EaglePlatform')
1453
1454         # Look for ClipYou (uses Eagle.Platform) embeds
1455         mobj = re.search(
1456             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1457         if mobj is not None:
1458             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1459
1460         # Look for Pladform embeds
1461         mobj = re.search(
1462             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1463         if mobj is not None:
1464             return self.url_result(mobj.group('url'), 'Pladform')
1465
1466         # Look for Playwire embeds
1467         mobj = re.search(
1468             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1469         if mobj is not None:
1470             return self.url_result(mobj.group('url'))
1471
1472         # Look for 5min embeds
1473         mobj = re.search(
1474             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1475         if mobj is not None:
1476             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1477
1478         # Look for Crooks and Liars embeds
1479         mobj = re.search(
1480             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1481         if mobj is not None:
1482             return self.url_result(mobj.group('url'))
1483
1484         # Look for NBC Sports VPlayer embeds
1485         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1486         if nbc_sports_url:
1487             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1488
1489         # Look for UDN embeds
1490         mobj = re.search(
1491             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1492         if mobj is not None:
1493             return self.url_result(
1494                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1495
1496         # Look for Senate ISVP iframe
1497         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1498         if senate_isvp_url:
1499             return self.url_result(senate_isvp_url, 'SenateISVP')
1500
1501         # Look for Dailymotion Cloud videos
1502         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1503         if dmcloud_url:
1504             return self.url_result(dmcloud_url, 'DailymotionCloud')
1505
1506         def check_video(vurl):
1507             if YoutubeIE.suitable(vurl):
1508                 return True
1509             vpath = compat_urlparse.urlparse(vurl).path
1510             vext = determine_ext(vpath)
1511             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1512
1513         def filter_video(urls):
1514             return list(filter(check_video, urls))
1515
1516         # Start with something easy: JW Player in SWFObject
1517         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1518         if not found:
1519             # Look for gorilla-vid style embedding
1520             found = filter_video(re.findall(r'''(?sx)
1521                 (?:
1522                     jw_plugins|
1523                     JWPlayerOptions|
1524                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1525                 )
1526                 .*?
1527                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1528         if not found:
1529             # Broaden the search a little bit
1530             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1531         if not found:
1532             # Broaden the findall a little bit: JWPlayer JS loader
1533             found = filter_video(re.findall(
1534                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1535         if not found:
1536             # Flow player
1537             found = filter_video(re.findall(r'''(?xs)
1538                 flowplayer\("[^"]+",\s*
1539                     \{[^}]+?\}\s*,
1540                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1541                         ["']?url["']?\s*:\s*["']([^"']+)["']
1542             ''', webpage))
1543         if not found:
1544             # Cinerama player
1545             found = re.findall(
1546                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1547         if not found:
1548             # Try to find twitter cards info
1549             found = filter_video(re.findall(
1550                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1551         if not found:
1552             # We look for Open Graph info:
1553             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1554             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1555             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1556             if m_video_type is not None:
1557                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1558         if not found:
1559             # HTML5 video
1560             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1561         if not found:
1562             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1563             found = re.search(
1564                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1565                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1566                 webpage)
1567             if not found:
1568                 # Look also in Refresh HTTP header
1569                 refresh_header = head_response.headers.get('Refresh')
1570                 if refresh_header:
1571                     found = re.search(REDIRECT_REGEX, refresh_header)
1572             if found:
1573                 new_url = compat_urlparse.urljoin(url, found.group(1))
1574                 self.report_following_redirect(new_url)
1575                 return {
1576                     '_type': 'url',
1577                     'url': new_url,
1578                 }
1579         if not found:
1580             raise UnsupportedError(url)
1581
1582         entries = []
1583         for video_url in found:
1584             video_url = compat_urlparse.urljoin(url, video_url)
1585             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1586
1587             # Sometimes, jwplayer extraction will result in a YouTube URL
1588             if YoutubeIE.suitable(video_url):
1589                 entries.append(self.url_result(video_url, 'Youtube'))
1590                 continue
1591
1592             # here's a fun little line of code for you:
1593             video_id = os.path.splitext(video_id)[0]
1594
1595             if determine_ext(video_url) == 'smil':
1596                 entries.append({
1597                     'id': video_id,
1598                     'formats': self._extract_smil_formats(video_url, video_id),
1599                     'uploader': video_uploader,
1600                     'title': video_title,
1601                     'age_limit': age_limit,
1602                 })
1603             else:
1604                 entries.append({
1605                     'id': video_id,
1606                     'url': video_url,
1607                     'uploader': video_uploader,
1608                     'title': video_title,
1609                     'age_limit': age_limit,
1610                 })
1611
1612         if len(entries) == 1:
1613             return entries[0]
1614         else:
1615             for num, e in enumerate(entries, start=1):
1616                 # 'url' results don't have a title
1617                 if e.get('title') is not None:
1618                     e['title'] = '%s (%d)' % (e['title'], num)
1619             return {
1620                 '_type': 'playlist',
1621                 'entries': entries,
1622             }