Merge branch 'hlintala-tvc'
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7
8 from .common import InfoExtractor
9 from .youtube import YoutubeIE
10 from ..compat import (
11     compat_urllib_parse,
12     compat_urllib_parse_unquote,
13     compat_urllib_request,
14     compat_urlparse,
15     compat_xml_parse_error,
16 )
17 from ..utils import (
18     determine_ext,
19     ExtractorError,
20     float_or_none,
21     HEADRequest,
22     is_html,
23     orderedSet,
24     parse_xml,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import BrightcoveIE
34 from .nbc import NBCSportsVPlayerIE
35 from .ooyala import OoyalaIE
36 from .rutv import RUTVIE
37 from .tvc import TVCIE
38 from .sportbox import SportBoxEmbedIE
39 from .smotri import SmotriIE
40 from .condenast import CondeNastIE
41 from .udn import UDNEmbedIE
42 from .senateisvp import SenateISVPIE
43 from .bliptv import BlipTVIE
44 from .svt import SVTIE
45
46
47 class GenericIE(InfoExtractor):
48     IE_DESC = 'Generic downloader that works on some sites'
49     _VALID_URL = r'.*'
50     IE_NAME = 'generic'
51     _TESTS = [
52         # Direct link to a video
53         {
54             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
55             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
56             'info_dict': {
57                 'id': 'trailer',
58                 'ext': 'mp4',
59                 'title': 'trailer',
60                 'upload_date': '20100513',
61             }
62         },
63         # Direct link to media delivered compressed (until Accept-Encoding is *)
64         {
65             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
66             'md5': '128c42e68b13950268b648275386fc74',
67             'info_dict': {
68                 'id': 'FictionJunction-Parallel_Hearts',
69                 'ext': 'flac',
70                 'title': 'FictionJunction-Parallel_Hearts',
71                 'upload_date': '20140522',
72             },
73             'expected_warnings': [
74                 'URL could be a direct video link, returning it as such.'
75             ]
76         },
77         # Direct download with broken HEAD
78         {
79             'url': 'http://ai-radio.org:8000/radio.opus',
80             'info_dict': {
81                 'id': 'radio',
82                 'ext': 'opus',
83                 'title': 'radio',
84             },
85             'params': {
86                 'skip_download': True,  # infinite live stream
87             },
88             'expected_warnings': [
89                 r'501.*Not Implemented'
90             ],
91         },
92         # Direct link with incorrect MIME type
93         {
94             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
95             'md5': '4ccbebe5f36706d85221f204d7eb5913',
96             'info_dict': {
97                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
98                 'id': '5_Lennart_Poettering_-_Systemd',
99                 'ext': 'webm',
100                 'title': '5_Lennart_Poettering_-_Systemd',
101                 'upload_date': '20141120',
102             },
103             'expected_warnings': [
104                 'URL could be a direct video link, returning it as such.'
105             ]
106         },
107         # RSS feed
108         {
109             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
110             'info_dict': {
111                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
112                 'title': 'Zero Punctuation',
113                 'description': 're:.*groundbreaking video review series.*'
114             },
115             'playlist_mincount': 11,
116         },
117         # RSS feed with enclosure
118         {
119             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
120             'info_dict': {
121                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
122                 'ext': 'm4v',
123                 'upload_date': '20150228',
124                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
125             }
126         },
127         # google redirect
128         {
129             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
130             'info_dict': {
131                 'id': 'cmQHVoWB5FY',
132                 'ext': 'mp4',
133                 'upload_date': '20130224',
134                 'uploader_id': 'TheVerge',
135                 'description': 're:^Chris Ziegler takes a look at the\.*',
136                 'uploader': 'The Verge',
137                 'title': 'First Firefox OS phones side-by-side',
138             },
139             'params': {
140                 'skip_download': False,
141             }
142         },
143         {
144             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
145             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
146             'info_dict': {
147                 'id': '13601338388002',
148                 'ext': 'mp4',
149                 'uploader': 'www.hodiho.fr',
150                 'title': 'R\u00e9gis plante sa Jeep',
151             }
152         },
153         # bandcamp page with custom domain
154         {
155             'add_ie': ['Bandcamp'],
156             'url': 'http://bronyrock.com/track/the-pony-mash',
157             'info_dict': {
158                 'id': '3235767654',
159                 'ext': 'mp3',
160                 'title': 'The Pony Mash',
161                 'uploader': 'M_Pallante',
162             },
163             'skip': 'There is a limit of 200 free downloads / month for the test song',
164         },
165         # embedded brightcove video
166         # it also tests brightcove videos that need to set the 'Referer' in the
167         # http requests
168         {
169             'add_ie': ['Brightcove'],
170             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
171             'info_dict': {
172                 'id': '2765128793001',
173                 'ext': 'mp4',
174                 'title': 'Le cours de bourse : l’analyse technique',
175                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
176                 'uploader': 'BFM BUSINESS',
177             },
178             'params': {
179                 'skip_download': True,
180             },
181         },
182         {
183             # https://github.com/rg3/youtube-dl/issues/2253
184             'url': 'http://bcove.me/i6nfkrc3',
185             'md5': '0ba9446db037002366bab3b3eb30c88c',
186             'info_dict': {
187                 'id': '3101154703001',
188                 'ext': 'mp4',
189                 'title': 'Still no power',
190                 'uploader': 'thestar.com',
191                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
192             },
193             'add_ie': ['Brightcove'],
194         },
195         {
196             'url': 'http://www.championat.com/video/football/v/87/87499.html',
197             'md5': 'fb973ecf6e4a78a67453647444222983',
198             'info_dict': {
199                 'id': '3414141473001',
200                 'ext': 'mp4',
201                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
202                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
203                 'uploader': 'Championat',
204             },
205         },
206         {
207             # https://github.com/rg3/youtube-dl/issues/3541
208             'add_ie': ['Brightcove'],
209             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
210             'info_dict': {
211                 'id': '3866516442001',
212                 'ext': 'mp4',
213                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
214                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
215                 'uploader': 'SBS Broadcasting',
216             },
217             'skip': 'Restricted to Netherlands',
218             'params': {
219                 'skip_download': True,  # m3u8 download
220             },
221         },
222         # ooyala video
223         {
224             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
225             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
226             'info_dict': {
227                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
228                 'ext': 'mp4',
229                 'title': '2cc213299525360.mov',  # that's what we get
230             },
231             'add_ie': ['Ooyala'],
232         },
233         # multiple ooyala embeds on SBN network websites
234         {
235             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
236             'info_dict': {
237                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
238                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
239             },
240             'playlist_mincount': 3,
241             'params': {
242                 'skip_download': True,
243             },
244             'add_ie': ['Ooyala'],
245         },
246         # embed.ly video
247         {
248             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
249             'info_dict': {
250                 'id': '9ODmcdjQcHQ',
251                 'ext': 'mp4',
252                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
253                 'upload_date': '20140225',
254                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
255                 'uploader': 'Tested',
256                 'uploader_id': 'testedcom',
257             },
258             # No need to test YoutubeIE here
259             'params': {
260                 'skip_download': True,
261             },
262         },
263         # funnyordie embed
264         {
265             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
266             'info_dict': {
267                 'id': '18e820ec3f',
268                 'ext': 'mp4',
269                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
270                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
271             },
272         },
273         # BBC iPlayer embeds
274         {
275             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
276             'info_dict': {
277                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
278             },
279             'playlist_mincount': 18,
280         },
281         # RUTV embed
282         {
283             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
284             'info_dict': {
285                 'id': '776940',
286                 'ext': 'mp4',
287                 'title': 'Охотское море стало целиком российским',
288                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
289             },
290             'params': {
291                 # m3u8 download
292                 'skip_download': True,
293             },
294         },
295         # TVC embed
296         {
297             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
298             'info_dict': {
299                 'id': '55304',
300                 'ext': 'mp4',
301                 'title': 'Дошкольное воспитание',
302             },
303         },
304         # SportBox embed
305         {
306             'url': 'http://www.vestifinance.ru/articles/25753',
307             'info_dict': {
308                 'id': '25753',
309                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
310             },
311             'playlist': [{
312                 'info_dict': {
313                     'id': '370908',
314                     'title': 'Госзаказ. День 3',
315                     'ext': 'mp4',
316                 }
317             }, {
318                 'info_dict': {
319                     'id': '370905',
320                     'title': 'Госзаказ. День 2',
321                     'ext': 'mp4',
322                 }
323             }, {
324                 'info_dict': {
325                     'id': '370902',
326                     'title': 'Госзаказ. День 1',
327                     'ext': 'mp4',
328                 }
329             }],
330             'params': {
331                 # m3u8 download
332                 'skip_download': True,
333             },
334         },
335         # Embedded TED video
336         {
337             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
338             'md5': '65fdff94098e4a607385a60c5177c638',
339             'info_dict': {
340                 'id': '1969',
341                 'ext': 'mp4',
342                 'title': 'Hidden miracles of the natural world',
343                 'uploader': 'Louie Schwartzberg',
344                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
345             }
346         },
347         # Embeded Ustream video
348         {
349             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
350             'md5': '27b99cdb639c9b12a79bca876a073417',
351             'info_dict': {
352                 'id': '45734260',
353                 'ext': 'flv',
354                 'uploader': 'AU SPA:  The NSA and Privacy',
355                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
356             }
357         },
358         # nowvideo embed hidden behind percent encoding
359         {
360             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
361             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
362             'info_dict': {
363                 'id': '06e53103ca9aa',
364                 'ext': 'flv',
365                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
366                 'description': 'No description',
367             },
368         },
369         # arte embed
370         {
371             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
372             'md5': '7653032cbb25bf6c80d80f217055fa43',
373             'info_dict': {
374                 'id': '048195-004_PLUS7-F',
375                 'ext': 'flv',
376                 'title': 'X:enius',
377                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
378                 'upload_date': '20140320',
379             },
380             'params': {
381                 'skip_download': 'Requires rtmpdump'
382             }
383         },
384         # Condé Nast embed
385         {
386             'url': 'http://www.wired.com/2014/04/honda-asimo/',
387             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
388             'info_dict': {
389                 'id': '53501be369702d3275860000',
390                 'ext': 'mp4',
391                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
392             }
393         },
394         # Dailymotion embed
395         {
396             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
397             'md5': '441aeeb82eb72c422c7f14ec533999cd',
398             'info_dict': {
399                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
400                 'ext': 'mp4',
401                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
402                 'uploader': 'Spi0n',
403             },
404             'add_ie': ['Dailymotion'],
405         },
406         # YouTube embed
407         {
408             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
409             'info_dict': {
410                 'id': 'FXRb4ykk4S0',
411                 'ext': 'mp4',
412                 'title': 'The NBL Auction 2014',
413                 'uploader': 'BADMINTON England',
414                 'uploader_id': 'BADMINTONEvents',
415                 'upload_date': '20140603',
416                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
417             },
418             'add_ie': ['Youtube'],
419             'params': {
420                 'skip_download': True,
421             }
422         },
423         # MTVSercices embed
424         {
425             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
426             'md5': '35727f82f58c76d996fc188f9755b0d5',
427             'info_dict': {
428                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
429                 'ext': 'mp4',
430                 'title': 'Review',
431                 'description': 'Mario\'s life in the fast lane has never looked so good.',
432             },
433         },
434         # YouTube embed via <data-embed-url="">
435         {
436             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
437             'info_dict': {
438                 'id': '4vAffPZIT44',
439                 'ext': 'mp4',
440                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
441                 'uploader': 'Gameloft',
442                 'uploader_id': 'gameloft',
443                 'upload_date': '20140828',
444                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
445             },
446             'params': {
447                 'skip_download': True,
448             }
449         },
450         # Camtasia studio
451         {
452             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
453             'playlist': [{
454                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
455                 'info_dict': {
456                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
457                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
458                     'ext': 'flv',
459                     'duration': 2235.90,
460                 }
461             }, {
462                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
463                 'info_dict': {
464                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
465                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
466                     'ext': 'flv',
467                     'duration': 2235.93,
468                 }
469             }],
470             'info_dict': {
471                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
472             }
473         },
474         # Flowplayer
475         {
476             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
477             'md5': '9d65602bf31c6e20014319c7d07fba27',
478             'info_dict': {
479                 'id': '5123ea6d5e5a7',
480                 'ext': 'mp4',
481                 'age_limit': 18,
482                 'uploader': 'www.handjobhub.com',
483                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
484             }
485         },
486         # Multiple brightcove videos
487         # https://github.com/rg3/youtube-dl/issues/2283
488         {
489             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
490             'info_dict': {
491                 'id': 'always-never',
492                 'title': 'Always / Never - The New Yorker',
493             },
494             'playlist_count': 3,
495             'params': {
496                 'extract_flat': False,
497                 'skip_download': True,
498             }
499         },
500         # MLB embed
501         {
502             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
503             'md5': '96f09a37e44da40dd083e12d9a683327',
504             'info_dict': {
505                 'id': '33322633',
506                 'ext': 'mp4',
507                 'title': 'Ump changes call to ball',
508                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
509                 'duration': 48,
510                 'timestamp': 1401537900,
511                 'upload_date': '20140531',
512                 'thumbnail': 're:^https?://.*\.jpg$',
513             },
514         },
515         # Wistia embed
516         {
517             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
518             'md5': '8788b683c777a5cf25621eaf286d0c23',
519             'info_dict': {
520                 'id': '1cfaf6b7ea',
521                 'ext': 'mov',
522                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
523                 'duration': 643.0,
524                 'filesize': 182808282,
525                 'uploader': 'education-portal.com',
526             },
527         },
528         {
529             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
530             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
531             'info_dict': {
532                 'id': 'uxjb0lwrcz',
533                 'ext': 'mp4',
534                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
535                 'duration': 1715.0,
536                 'uploader': 'thoughtworks.wistia.com',
537             },
538         },
539         # Soundcloud embed
540         {
541             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
542             'info_dict': {
543                 'id': '174391317',
544                 'ext': 'mp3',
545                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
546                 'uploader': 'Sophos Security',
547                 'title': 'Chet Chat 171 - Oct 29, 2014',
548                 'upload_date': '20141029',
549             }
550         },
551         # Livestream embed
552         {
553             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
554             'info_dict': {
555                 'id': '67864563',
556                 'ext': 'flv',
557                 'upload_date': '20141112',
558                 'title': 'Rosetta #CometLanding webcast HL 10',
559             }
560         },
561         # LazyYT
562         {
563             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
564             'info_dict': {
565                 'id': '1986',
566                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
567             },
568             'playlist_mincount': 2,
569         },
570         # Cinchcast embed
571         {
572             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
573             'info_dict': {
574                 'id': '7141703',
575                 'ext': 'mp3',
576                 'upload_date': '20141126',
577                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
578             }
579         },
580         # Cinerama player
581         {
582             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
583             'info_dict': {
584                 'id': '730m_DandD_1901_512k',
585                 'ext': 'mp4',
586                 'uploader': 'www.abc.net.au',
587                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
588             }
589         },
590         # embedded viddler video
591         {
592             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
593             'info_dict': {
594                 'id': '4d03aad9',
595                 'ext': 'mp4',
596                 'uploader': 'deadspin',
597                 'title': 'WALL-TO-GORTAT',
598                 'timestamp': 1422285291,
599                 'upload_date': '20150126',
600             },
601             'add_ie': ['Viddler'],
602         },
603         # Libsyn embed
604         {
605             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
606             'info_dict': {
607                 'id': '3377616',
608                 'ext': 'mp3',
609                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
610                 'description': 'md5:601cb790edd05908957dae8aaa866465',
611                 'upload_date': '20150220',
612             },
613         },
614         # jwplayer YouTube
615         {
616             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
617             'info_dict': {
618                 'id': 'Mrj4DVp2zeA',
619                 'ext': 'mp4',
620                 'upload_date': '20150212',
621                 'uploader': 'The National Archives UK',
622                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
623                 'uploader_id': 'NationalArchives08',
624                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
625             },
626         },
627         # rtl.nl embed
628         {
629             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
630             'playlist_mincount': 5,
631             'info_dict': {
632                 'id': 'aanslagen-kopenhagen',
633                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
634             }
635         },
636         # Zapiks embed
637         {
638             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
639             'info_dict': {
640                 'id': '118046',
641                 'ext': 'mp4',
642                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
643             }
644         },
645         # Kaltura embed
646         {
647             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
648             'info_dict': {
649                 'id': '1_eergr3h1',
650                 'ext': 'mp4',
651                 'upload_date': '20150226',
652                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
653                 'timestamp': int,
654                 'title': 'John Carlson Postgame 2/25/15',
655             },
656         },
657         # Eagle.Platform embed (generic URL)
658         {
659             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
660             'info_dict': {
661                 'id': '227304',
662                 'ext': 'mp4',
663                 'title': 'Навальный вышел на свободу',
664                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
665                 'thumbnail': 're:^https?://.*\.jpg$',
666                 'duration': 87,
667                 'view_count': int,
668                 'age_limit': 0,
669             },
670         },
671         # ClipYou (Eagle.Platform) embed (custom URL)
672         {
673             'url': 'http://muz-tv.ru/play/7129/',
674             'info_dict': {
675                 'id': '12820',
676                 'ext': 'mp4',
677                 'title': "'O Sole Mio",
678                 'thumbnail': 're:^https?://.*\.jpg$',
679                 'duration': 216,
680                 'view_count': int,
681             },
682         },
683         # Pladform embed
684         {
685             'url': 'http://muz-tv.ru/kinozal/view/7400/',
686             'info_dict': {
687                 'id': '100183293',
688                 'ext': 'mp4',
689                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
690                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
691                 'thumbnail': 're:^https?://.*\.jpg$',
692                 'duration': 694,
693                 'age_limit': 0,
694             },
695         },
696         # Playwire embed
697         {
698             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
699             'info_dict': {
700                 'id': '3519514',
701                 'ext': 'mp4',
702                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
703                 'thumbnail': 're:^https?://.*\.png$',
704                 'duration': 45.115,
705             },
706         },
707         # 5min embed
708         {
709             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
710             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
711             'info_dict': {
712                 'id': '518726732',
713                 'ext': 'mp4',
714                 'title': 'Facebook Creates "On This Day" | Crunch Report',
715             },
716         },
717         # SVT embed
718         {
719             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
720             'info_dict': {
721                 'id': '2900353',
722                 'ext': 'flv',
723                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
724                 'duration': 27,
725                 'age_limit': 0,
726             },
727         },
728         # Crooks and Liars embed
729         {
730             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
731             'info_dict': {
732                 'id': '8RUoRhRi',
733                 'ext': 'mp4',
734                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
735                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
736                 'timestamp': 1428207000,
737                 'upload_date': '20150405',
738                 'uploader': 'Heather',
739             },
740         },
741         # Crooks and Liars external embed
742         {
743             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
744             'info_dict': {
745                 'id': 'MTE3MjUtMzQ2MzA',
746                 'ext': 'mp4',
747                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
748                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
749                 'timestamp': 1265032391,
750                 'upload_date': '20100201',
751                 'uploader': 'Heather',
752             },
753         },
754         # NBC Sports vplayer embed
755         {
756             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
757             'info_dict': {
758                 'id': 'ln7x1qSThw4k',
759                 'ext': 'flv',
760                 'title': "PFT Live: New leader in the 'new-look' defense",
761                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
762             },
763         },
764         # UDN embed
765         {
766             'url': 'http://www.udn.com/news/story/7314/822787',
767             'md5': 'fd2060e988c326991037b9aff9df21a6',
768             'info_dict': {
769                 'id': '300346',
770                 'ext': 'mp4',
771                 'title': '中一中男師變性 全校師生力挺',
772                 'thumbnail': 're:^https?://.*\.jpg$',
773             }
774         },
775         # Ooyala embed
776         {
777             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
778             'info_dict': {
779                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
780                 'ext': 'mp4',
781                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
782                 'title': 'This is what separates the Excel masters from the wannabes',
783             },
784             'params': {
785                 # m3u8 downloads
786                 'skip_download': True,
787             }
788         },
789         # Contains a SMIL manifest
790         {
791             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
792             'info_dict': {
793                 'id': 'file',
794                 'ext': 'flv',
795                 'title': '+ Football: Lottery Champions League Europe',
796                 'uploader': 'www.telewebion.com',
797             },
798             'params': {
799                 # rtmpe downloads
800                 'skip_download': True,
801             }
802         },
803         # Brightcove URL in single quotes
804         {
805             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
806             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
807             'info_dict': {
808                 'id': '4255764656001',
809                 'ext': 'mp4',
810                 'title': 'SN Presents: Russell Martin, World Citizen',
811                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
812                 'uploader': 'Rogers Sportsnet',
813             },
814         }
815     ]
816
817     def report_following_redirect(self, new_url):
818         """Report information extraction."""
819         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
820
821     def _extract_rss(self, url, video_id, doc):
822         playlist_title = doc.find('./channel/title').text
823         playlist_desc_el = doc.find('./channel/description')
824         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
825
826         entries = []
827         for it in doc.findall('./channel/item'):
828             next_url = xpath_text(it, 'link', fatal=False)
829             if not next_url:
830                 enclosure_nodes = it.findall('./enclosure')
831                 for e in enclosure_nodes:
832                     next_url = e.attrib.get('url')
833                     if next_url:
834                         break
835
836             if not next_url:
837                 continue
838
839             entries.append({
840                 '_type': 'url',
841                 'url': next_url,
842                 'title': it.find('title').text,
843             })
844
845         return {
846             '_type': 'playlist',
847             'id': url,
848             'title': playlist_title,
849             'description': playlist_desc,
850             'entries': entries,
851         }
852
853     def _extract_camtasia(self, url, video_id, webpage):
854         """ Returns None if no camtasia video can be found. """
855
856         camtasia_cfg = self._search_regex(
857             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
858             webpage, 'camtasia configuration file', default=None)
859         if camtasia_cfg is None:
860             return None
861
862         title = self._html_search_meta('DC.title', webpage, fatal=True)
863
864         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
865         camtasia_cfg = self._download_xml(
866             camtasia_url, video_id,
867             note='Downloading camtasia configuration',
868             errnote='Failed to download camtasia configuration')
869         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
870
871         entries = []
872         for n in fileset_node.getchildren():
873             url_n = n.find('./uri')
874             if url_n is None:
875                 continue
876
877             entries.append({
878                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
879                 'title': '%s - %s' % (title, n.tag),
880                 'url': compat_urlparse.urljoin(url, url_n.text),
881                 'duration': float_or_none(n.find('./duration').text),
882             })
883
884         return {
885             '_type': 'playlist',
886             'entries': entries,
887             'title': title,
888         }
889
890     def _real_extract(self, url):
891         if url.startswith('//'):
892             return {
893                 '_type': 'url',
894                 'url': self.http_scheme() + url,
895             }
896
897         parsed_url = compat_urlparse.urlparse(url)
898         if not parsed_url.scheme:
899             default_search = self._downloader.params.get('default_search')
900             if default_search is None:
901                 default_search = 'fixup_error'
902
903             if default_search in ('auto', 'auto_warning', 'fixup_error'):
904                 if '/' in url:
905                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
906                     return self.url_result('http://' + url)
907                 elif default_search != 'fixup_error':
908                     if default_search == 'auto_warning':
909                         if re.match(r'^(?:url|URL)$', url):
910                             raise ExtractorError(
911                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
912                                 expected=True)
913                         else:
914                             self._downloader.report_warning(
915                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
916                     return self.url_result('ytsearch:' + url)
917
918             if default_search in ('error', 'fixup_error'):
919                 raise ExtractorError(
920                     '%r is not a valid URL. '
921                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
922                     % (url, url), expected=True)
923             else:
924                 if ':' not in default_search:
925                     default_search += ':'
926                 return self.url_result(default_search + url)
927
928         url, smuggled_data = unsmuggle_url(url)
929         force_videoid = None
930         is_intentional = smuggled_data and smuggled_data.get('to_generic')
931         if smuggled_data and 'force_videoid' in smuggled_data:
932             force_videoid = smuggled_data['force_videoid']
933             video_id = force_videoid
934         else:
935             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
936
937         self.to_screen('%s: Requesting header' % video_id)
938
939         head_req = HEADRequest(url)
940         head_response = self._request_webpage(
941             head_req, video_id,
942             note=False, errnote='Could not send HEAD request to %s' % url,
943             fatal=False)
944
945         if head_response is not False:
946             # Check for redirect
947             new_url = head_response.geturl()
948             if url != new_url:
949                 self.report_following_redirect(new_url)
950                 if force_videoid:
951                     new_url = smuggle_url(
952                         new_url, {'force_videoid': force_videoid})
953                 return self.url_result(new_url)
954
955         full_response = None
956         if head_response is False:
957             request = compat_urllib_request.Request(url)
958             request.add_header('Accept-Encoding', '*')
959             full_response = self._request_webpage(request, video_id)
960             head_response = full_response
961
962         # Check for direct link to a video
963         content_type = head_response.headers.get('Content-Type', '')
964         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
965         if m:
966             upload_date = unified_strdate(
967                 head_response.headers.get('Last-Modified'))
968             return {
969                 'id': video_id,
970                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
971                 'direct': True,
972                 'formats': [{
973                     'format_id': m.group('format_id'),
974                     'url': url,
975                     'vcodec': 'none' if m.group('type') == 'audio' else None
976                 }],
977                 'upload_date': upload_date,
978             }
979
980         if not self._downloader.params.get('test', False) and not is_intentional:
981             self._downloader.report_warning('Falling back on generic information extractor.')
982
983         if not full_response:
984             request = compat_urllib_request.Request(url)
985             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
986             # making it impossible to download only chunk of the file (yet we need only 512kB to
987             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
988             # that will always result in downloading the whole file that is not desirable.
989             # Therefore for extraction pass we have to override Accept-Encoding to any in order
990             # to accept raw bytes and being able to download only a chunk.
991             # It may probably better to solve this by checking Content-Type for application/octet-stream
992             # after HEAD request finishes, but not sure if we can rely on this.
993             request.add_header('Accept-Encoding', '*')
994             full_response = self._request_webpage(request, video_id)
995
996         # Maybe it's a direct link to a video?
997         # Be careful not to download the whole thing!
998         first_bytes = full_response.read(512)
999         if not is_html(first_bytes):
1000             self._downloader.report_warning(
1001                 'URL could be a direct video link, returning it as such.')
1002             upload_date = unified_strdate(
1003                 head_response.headers.get('Last-Modified'))
1004             return {
1005                 'id': video_id,
1006                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1007                 'direct': True,
1008                 'url': url,
1009                 'upload_date': upload_date,
1010             }
1011
1012         webpage = self._webpage_read_content(
1013             full_response, url, video_id, prefix=first_bytes)
1014
1015         self.report_extraction(video_id)
1016
1017         # Is it an RSS feed?
1018         try:
1019             doc = parse_xml(webpage)
1020             if doc.tag == 'rss':
1021                 return self._extract_rss(url, video_id, doc)
1022         except compat_xml_parse_error:
1023             pass
1024
1025         # Is it a Camtasia project?
1026         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1027         if camtasia_res is not None:
1028             return camtasia_res
1029
1030         # Sometimes embedded video player is hidden behind percent encoding
1031         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1032         # Unescaping the whole page allows to handle those cases in a generic way
1033         webpage = compat_urllib_parse.unquote(webpage)
1034
1035         # it's tempting to parse this further, but you would
1036         # have to take into account all the variations like
1037         #   Video Title - Site Name
1038         #   Site Name | Video Title
1039         #   Video Title - Tagline | Site Name
1040         # and so on and so forth; it's just not practical
1041         video_title = self._html_search_regex(
1042             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1043             default='video')
1044
1045         # Try to detect age limit automatically
1046         age_limit = self._rta_search(webpage)
1047         # And then there are the jokers who advertise that they use RTA,
1048         # but actually don't.
1049         AGE_LIMIT_MARKERS = [
1050             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1051         ]
1052         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1053             age_limit = 18
1054
1055         # video uploader is domain name
1056         video_uploader = self._search_regex(
1057             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1058
1059         # Helper method
1060         def _playlist_from_matches(matches, getter=None, ie=None):
1061             urlrs = orderedSet(
1062                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1063                 for m in matches)
1064             return self.playlist_result(
1065                 urlrs, playlist_id=video_id, playlist_title=video_title)
1066
1067         # Look for BrightCove:
1068         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1069         if bc_urls:
1070             self.to_screen('Brightcove video detected.')
1071             entries = [{
1072                 '_type': 'url',
1073                 'url': smuggle_url(bc_url, {'Referer': url}),
1074                 'ie_key': 'Brightcove'
1075             } for bc_url in bc_urls]
1076
1077             return {
1078                 '_type': 'playlist',
1079                 'title': video_title,
1080                 'id': video_id,
1081                 'entries': entries,
1082             }
1083
1084         # Look for embedded rtl.nl player
1085         matches = re.findall(
1086             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1087             webpage)
1088         if matches:
1089             return _playlist_from_matches(matches, ie='RtlNl')
1090
1091         # Look for embedded (iframe) Vimeo player
1092         mobj = re.search(
1093             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
1094         if mobj:
1095             player_url = unescapeHTML(mobj.group('url'))
1096             surl = smuggle_url(player_url, {'Referer': url})
1097             return self.url_result(surl)
1098         # Look for embedded (swf embed) Vimeo player
1099         mobj = re.search(
1100             r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1101         if mobj:
1102             return self.url_result(mobj.group(1))
1103
1104         # Look for embedded YouTube player
1105         matches = re.findall(r'''(?x)
1106             (?:
1107                 <iframe[^>]+?src=|
1108                 data-video-url=|
1109                 <embed[^>]+?src=|
1110                 embedSWF\(?:\s*|
1111                 new\s+SWFObject\(
1112             )
1113             (["\'])
1114                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1115                 (?:embed|v|p)/.+?)
1116             \1''', webpage)
1117         if matches:
1118             return _playlist_from_matches(
1119                 matches, lambda m: unescapeHTML(m[1]))
1120
1121         # Look for lazyYT YouTube embed
1122         matches = re.findall(
1123             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1124         if matches:
1125             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1126
1127         # Look for embedded Dailymotion player
1128         matches = re.findall(
1129             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1130         if matches:
1131             return _playlist_from_matches(
1132                 matches, lambda m: unescapeHTML(m[1]))
1133
1134         # Look for embedded Dailymotion playlist player (#3822)
1135         m = re.search(
1136             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1137         if m:
1138             playlists = re.findall(
1139                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1140             if playlists:
1141                 return _playlist_from_matches(
1142                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1143
1144         # Look for embedded Wistia player
1145         match = re.search(
1146             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1147         if match:
1148             embed_url = self._proto_relative_url(
1149                 unescapeHTML(match.group('url')))
1150             return {
1151                 '_type': 'url_transparent',
1152                 'url': embed_url,
1153                 'ie_key': 'Wistia',
1154                 'uploader': video_uploader,
1155                 'title': video_title,
1156                 'id': video_id,
1157             }
1158
1159         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1160         if match:
1161             return {
1162                 '_type': 'url_transparent',
1163                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1164                 'ie_key': 'Wistia',
1165                 'uploader': video_uploader,
1166                 'title': video_title,
1167                 'id': match.group('id')
1168             }
1169
1170         # Look for embedded blip.tv player
1171         bliptv_url = BlipTVIE._extract_url(webpage)
1172         if bliptv_url:
1173             return self.url_result(bliptv_url, 'BlipTV')
1174
1175         # Look for SVT player
1176         svt_url = SVTIE._extract_url(webpage)
1177         if svt_url:
1178             return self.url_result(svt_url, 'SVT')
1179
1180         # Look for embedded condenast player
1181         matches = re.findall(
1182             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1183             webpage)
1184         if matches:
1185             return {
1186                 '_type': 'playlist',
1187                 'entries': [{
1188                     '_type': 'url',
1189                     'ie_key': 'CondeNast',
1190                     'url': ma,
1191                 } for ma in matches],
1192                 'title': video_title,
1193                 'id': video_id,
1194             }
1195
1196         # Look for Bandcamp pages with custom domain
1197         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1198         if mobj is not None:
1199             burl = unescapeHTML(mobj.group(1))
1200             # Don't set the extractor because it can be a track url or an album
1201             return self.url_result(burl)
1202
1203         # Look for embedded Vevo player
1204         mobj = re.search(
1205             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1206         if mobj is not None:
1207             return self.url_result(mobj.group('url'))
1208
1209         # Look for embedded Viddler player
1210         mobj = re.search(
1211             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1212             webpage)
1213         if mobj is not None:
1214             return self.url_result(mobj.group('url'))
1215
1216         # Look for NYTimes player
1217         mobj = re.search(
1218             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1219             webpage)
1220         if mobj is not None:
1221             return self.url_result(mobj.group('url'))
1222
1223         # Look for Libsyn player
1224         mobj = re.search(
1225             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1226         if mobj is not None:
1227             return self.url_result(mobj.group('url'))
1228
1229         # Look for Ooyala videos
1230         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1231                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1232                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1233                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1234         if mobj is not None:
1235             return OoyalaIE._build_url_result(mobj.group('ec'))
1236
1237         # Look for multiple Ooyala embeds on SBN network websites
1238         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1239         if mobj is not None:
1240             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1241             if embeds:
1242                 return _playlist_from_matches(
1243                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1244
1245         # Look for Aparat videos
1246         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1247         if mobj is not None:
1248             return self.url_result(mobj.group(1), 'Aparat')
1249
1250         # Look for MPORA videos
1251         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1252         if mobj is not None:
1253             return self.url_result(mobj.group(1), 'Mpora')
1254
1255         # Look for embedded NovaMov-based player
1256         mobj = re.search(
1257             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1258                     (?P<url>http://(?:(?:embed|www)\.)?
1259                         (?:novamov\.com|
1260                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1261                            videoweed\.(?:es|com)|
1262                            movshare\.(?:net|sx|ag)|
1263                            divxstage\.(?:eu|net|ch|co|at|ag))
1264                         /embed\.php.+?)\1''', webpage)
1265         if mobj is not None:
1266             return self.url_result(mobj.group('url'))
1267
1268         # Look for embedded Facebook player
1269         mobj = re.search(
1270             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1271         if mobj is not None:
1272             return self.url_result(mobj.group('url'), 'Facebook')
1273
1274         # Look for embedded VK player
1275         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1276         if mobj is not None:
1277             return self.url_result(mobj.group('url'), 'VK')
1278
1279         # Look for embedded ivi player
1280         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1281         if mobj is not None:
1282             return self.url_result(mobj.group('url'), 'Ivi')
1283
1284         # Look for embedded Huffington Post player
1285         mobj = re.search(
1286             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1287         if mobj is not None:
1288             return self.url_result(mobj.group('url'), 'HuffPost')
1289
1290         # Look for embed.ly
1291         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1292         if mobj is not None:
1293             return self.url_result(mobj.group('url'))
1294         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1295         if mobj is not None:
1296             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1297
1298         # Look for funnyordie embed
1299         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1300         if matches:
1301             return _playlist_from_matches(
1302                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1303
1304         # Look for BBC iPlayer embed
1305         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1306         if matches:
1307             return _playlist_from_matches(matches, ie='BBCCoUk')
1308
1309         # Look for embedded RUTV player
1310         rutv_url = RUTVIE._extract_url(webpage)
1311         if rutv_url:
1312             return self.url_result(rutv_url, 'RUTV')
1313
1314         # Look for embedded TVC player
1315         rutv_url = TVCIE._extract_url(webpage)
1316         if rutv_url:
1317             return self.url_result(rutv_url, 'TVC')
1318
1319         # Look for embedded SportBox player
1320         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1321         if sportbox_urls:
1322             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1323
1324         # Look for embedded TED player
1325         mobj = re.search(
1326             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1327         if mobj is not None:
1328             return self.url_result(mobj.group('url'), 'TED')
1329
1330         # Look for embedded Ustream videos
1331         mobj = re.search(
1332             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1333         if mobj is not None:
1334             return self.url_result(mobj.group('url'), 'Ustream')
1335
1336         # Look for embedded arte.tv player
1337         mobj = re.search(
1338             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1339             webpage)
1340         if mobj is not None:
1341             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1342
1343         # Look for embedded smotri.com player
1344         smotri_url = SmotriIE._extract_url(webpage)
1345         if smotri_url:
1346             return self.url_result(smotri_url, 'Smotri')
1347
1348         # Look for embeded soundcloud player
1349         mobj = re.search(
1350             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1351             webpage)
1352         if mobj is not None:
1353             url = unescapeHTML(mobj.group('url'))
1354             return self.url_result(url)
1355
1356         # Look for embedded vulture.com player
1357         mobj = re.search(
1358             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1359             webpage)
1360         if mobj is not None:
1361             url = unescapeHTML(mobj.group('url'))
1362             return self.url_result(url, ie='Vulture')
1363
1364         # Look for embedded mtvservices player
1365         mobj = re.search(
1366             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1367             webpage)
1368         if mobj is not None:
1369             url = unescapeHTML(mobj.group('url'))
1370             return self.url_result(url, ie='MTVServicesEmbedded')
1371
1372         # Look for embedded yahoo player
1373         mobj = re.search(
1374             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1375             webpage)
1376         if mobj is not None:
1377             return self.url_result(mobj.group('url'), 'Yahoo')
1378
1379         # Look for embedded sbs.com.au player
1380         mobj = re.search(
1381             r'''(?x)
1382             (?:
1383                 <meta\s+property="og:video"\s+content=|
1384                 <iframe[^>]+?src=
1385             )
1386             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1387             webpage)
1388         if mobj is not None:
1389             return self.url_result(mobj.group('url'), 'SBS')
1390
1391         # Look for embedded Cinchcast player
1392         mobj = re.search(
1393             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1394             webpage)
1395         if mobj is not None:
1396             return self.url_result(mobj.group('url'), 'Cinchcast')
1397
1398         mobj = re.search(
1399             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1400             webpage)
1401         if not mobj:
1402             mobj = re.search(
1403                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1404                 webpage)
1405         if mobj is not None:
1406             return self.url_result(mobj.group('url'), 'MLB')
1407
1408         mobj = re.search(
1409             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1410             webpage)
1411         if mobj is not None:
1412             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1413
1414         mobj = re.search(
1415             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1416             webpage)
1417         if mobj is not None:
1418             return self.url_result(mobj.group('url'), 'Livestream')
1419
1420         # Look for Zapiks embed
1421         mobj = re.search(
1422             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1423         if mobj is not None:
1424             return self.url_result(mobj.group('url'), 'Zapiks')
1425
1426         # Look for Kaltura embeds
1427         mobj = re.search(
1428             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1429         if mobj is not None:
1430             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1431
1432         # Look for Eagle.Platform embeds
1433         mobj = re.search(
1434             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1435         if mobj is not None:
1436             return self.url_result(mobj.group('url'), 'EaglePlatform')
1437
1438         # Look for ClipYou (uses Eagle.Platform) embeds
1439         mobj = re.search(
1440             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1441         if mobj is not None:
1442             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1443
1444         # Look for Pladform embeds
1445         mobj = re.search(
1446             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1447         if mobj is not None:
1448             return self.url_result(mobj.group('url'), 'Pladform')
1449
1450         # Look for Playwire embeds
1451         mobj = re.search(
1452             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1453         if mobj is not None:
1454             return self.url_result(mobj.group('url'))
1455
1456         # Look for 5min embeds
1457         mobj = re.search(
1458             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1459         if mobj is not None:
1460             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1461
1462         # Look for Crooks and Liars embeds
1463         mobj = re.search(
1464             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1465         if mobj is not None:
1466             return self.url_result(mobj.group('url'))
1467
1468         # Look for NBC Sports VPlayer embeds
1469         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1470         if nbc_sports_url:
1471             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1472
1473         # Look for UDN embeds
1474         mobj = re.search(
1475             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1476         if mobj is not None:
1477             return self.url_result(
1478                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1479
1480         # Look for Senate ISVP iframe
1481         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1482         if senate_isvp_url:
1483             return self.url_result(senate_isvp_url, 'SenateISVP')
1484
1485         def check_video(vurl):
1486             if YoutubeIE.suitable(vurl):
1487                 return True
1488             vpath = compat_urlparse.urlparse(vurl).path
1489             vext = determine_ext(vpath)
1490             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1491
1492         def filter_video(urls):
1493             return list(filter(check_video, urls))
1494
1495         # Start with something easy: JW Player in SWFObject
1496         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1497         if not found:
1498             # Look for gorilla-vid style embedding
1499             found = filter_video(re.findall(r'''(?sx)
1500                 (?:
1501                     jw_plugins|
1502                     JWPlayerOptions|
1503                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1504                 )
1505                 .*?
1506                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1507         if not found:
1508             # Broaden the search a little bit
1509             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1510         if not found:
1511             # Broaden the findall a little bit: JWPlayer JS loader
1512             found = filter_video(re.findall(
1513                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1514         if not found:
1515             # Flow player
1516             found = filter_video(re.findall(r'''(?xs)
1517                 flowplayer\("[^"]+",\s*
1518                     \{[^}]+?\}\s*,
1519                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1520                         ["']?url["']?\s*:\s*["']([^"']+)["']
1521             ''', webpage))
1522         if not found:
1523             # Cinerama player
1524             found = re.findall(
1525                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1526         if not found:
1527             # Try to find twitter cards info
1528             found = filter_video(re.findall(
1529                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1530         if not found:
1531             # We look for Open Graph info:
1532             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1533             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1534             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1535             if m_video_type is not None:
1536                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1537         if not found:
1538             # HTML5 video
1539             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1540         if not found:
1541             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1542             found = re.search(
1543                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1544                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1545                 webpage)
1546             if not found:
1547                 # Look also in Refresh HTTP header
1548                 refresh_header = head_response.headers.get('Refresh')
1549                 if refresh_header:
1550                     found = re.search(REDIRECT_REGEX, refresh_header)
1551             if found:
1552                 new_url = compat_urlparse.urljoin(url, found.group(1))
1553                 self.report_following_redirect(new_url)
1554                 return {
1555                     '_type': 'url',
1556                     'url': new_url,
1557                 }
1558         if not found:
1559             raise UnsupportedError(url)
1560
1561         entries = []
1562         for video_url in found:
1563             video_url = compat_urlparse.urljoin(url, video_url)
1564             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1565
1566             # Sometimes, jwplayer extraction will result in a YouTube URL
1567             if YoutubeIE.suitable(video_url):
1568                 entries.append(self.url_result(video_url, 'Youtube'))
1569                 continue
1570
1571             # here's a fun little line of code for you:
1572             video_id = os.path.splitext(video_id)[0]
1573
1574             if determine_ext(video_url) == 'smil':
1575                 entries.append({
1576                     'id': video_id,
1577                     'formats': self._extract_smil_formats(video_url, video_id),
1578                     'uploader': video_uploader,
1579                     'title': video_title,
1580                     'age_limit': age_limit,
1581                 })
1582             else:
1583                 entries.append({
1584                     'id': video_id,
1585                     'url': video_url,
1586                     'uploader': video_uploader,
1587                     'title': video_title,
1588                     'age_limit': age_limit,
1589                 })
1590
1591         if len(entries) == 1:
1592             return entries[0]
1593         else:
1594             for num, e in enumerate(entries, start=1):
1595                 # 'url' results don't have a title
1596                 if e.get('title') is not None:
1597                     e['title'] = '%s (%d)' % (e['title'], num)
1598             return {
1599                 '_type': 'playlist',
1600                 'entries': entries,
1601             }