[extractor/generic] Add support for snagfilms embeds
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7
8 from .common import InfoExtractor
9 from .youtube import YoutubeIE
10 from ..compat import (
11     compat_urllib_parse,
12     compat_urllib_parse_unquote,
13     compat_urllib_request,
14     compat_urlparse,
15     compat_xml_parse_error,
16 )
17 from ..utils import (
18     determine_ext,
19     ExtractorError,
20     float_or_none,
21     HEADRequest,
22     is_html,
23     orderedSet,
24     parse_xml,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import BrightcoveIE
34 from .nbc import NBCSportsVPlayerIE
35 from .ooyala import OoyalaIE
36 from .rutv import RUTVIE
37 from .tvc import TVCIE
38 from .sportbox import SportBoxEmbedIE
39 from .smotri import SmotriIE
40 from .condenast import CondeNastIE
41 from .udn import UDNEmbedIE
42 from .senateisvp import SenateISVPIE
43 from .bliptv import BlipTVIE
44 from .svt import SVTIE
45 from .pornhub import PornHubIE
46 from .xhamster import XHamsterEmbedIE
47 from .vimeo import VimeoIE
48 from .dailymotion import DailymotionCloudIE
49 from .onionstudios import OnionStudiosIE
50 from .snagfilms import SnagFilmsEmbedIE
51
52
53 class GenericIE(InfoExtractor):
54     IE_DESC = 'Generic downloader that works on some sites'
55     _VALID_URL = r'.*'
56     IE_NAME = 'generic'
57     _TESTS = [
58         # Direct link to a video
59         {
60             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
61             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
62             'info_dict': {
63                 'id': 'trailer',
64                 'ext': 'mp4',
65                 'title': 'trailer',
66                 'upload_date': '20100513',
67             }
68         },
69         # Direct link to media delivered compressed (until Accept-Encoding is *)
70         {
71             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
72             'md5': '128c42e68b13950268b648275386fc74',
73             'info_dict': {
74                 'id': 'FictionJunction-Parallel_Hearts',
75                 'ext': 'flac',
76                 'title': 'FictionJunction-Parallel_Hearts',
77                 'upload_date': '20140522',
78             },
79             'expected_warnings': [
80                 'URL could be a direct video link, returning it as such.'
81             ]
82         },
83         # Direct download with broken HEAD
84         {
85             'url': 'http://ai-radio.org:8000/radio.opus',
86             'info_dict': {
87                 'id': 'radio',
88                 'ext': 'opus',
89                 'title': 'radio',
90             },
91             'params': {
92                 'skip_download': True,  # infinite live stream
93             },
94             'expected_warnings': [
95                 r'501.*Not Implemented'
96             ],
97         },
98         # Direct link with incorrect MIME type
99         {
100             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
101             'md5': '4ccbebe5f36706d85221f204d7eb5913',
102             'info_dict': {
103                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
104                 'id': '5_Lennart_Poettering_-_Systemd',
105                 'ext': 'webm',
106                 'title': '5_Lennart_Poettering_-_Systemd',
107                 'upload_date': '20141120',
108             },
109             'expected_warnings': [
110                 'URL could be a direct video link, returning it as such.'
111             ]
112         },
113         # RSS feed
114         {
115             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
116             'info_dict': {
117                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
118                 'title': 'Zero Punctuation',
119                 'description': 're:.*groundbreaking video review series.*'
120             },
121             'playlist_mincount': 11,
122         },
123         # RSS feed with enclosure
124         {
125             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
126             'info_dict': {
127                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
128                 'ext': 'm4v',
129                 'upload_date': '20150228',
130                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
131             }
132         },
133         # google redirect
134         {
135             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
136             'info_dict': {
137                 'id': 'cmQHVoWB5FY',
138                 'ext': 'mp4',
139                 'upload_date': '20130224',
140                 'uploader_id': 'TheVerge',
141                 'description': 're:^Chris Ziegler takes a look at the\.*',
142                 'uploader': 'The Verge',
143                 'title': 'First Firefox OS phones side-by-side',
144             },
145             'params': {
146                 'skip_download': False,
147             }
148         },
149         {
150             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
151             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
152             'info_dict': {
153                 'id': '13601338388002',
154                 'ext': 'mp4',
155                 'uploader': 'www.hodiho.fr',
156                 'title': 'R\u00e9gis plante sa Jeep',
157             }
158         },
159         # bandcamp page with custom domain
160         {
161             'add_ie': ['Bandcamp'],
162             'url': 'http://bronyrock.com/track/the-pony-mash',
163             'info_dict': {
164                 'id': '3235767654',
165                 'ext': 'mp3',
166                 'title': 'The Pony Mash',
167                 'uploader': 'M_Pallante',
168             },
169             'skip': 'There is a limit of 200 free downloads / month for the test song',
170         },
171         # embedded brightcove video
172         # it also tests brightcove videos that need to set the 'Referer' in the
173         # http requests
174         {
175             'add_ie': ['Brightcove'],
176             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
177             'info_dict': {
178                 'id': '2765128793001',
179                 'ext': 'mp4',
180                 'title': 'Le cours de bourse : l’analyse technique',
181                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
182                 'uploader': 'BFM BUSINESS',
183             },
184             'params': {
185                 'skip_download': True,
186             },
187         },
188         {
189             # https://github.com/rg3/youtube-dl/issues/2253
190             'url': 'http://bcove.me/i6nfkrc3',
191             'md5': '0ba9446db037002366bab3b3eb30c88c',
192             'info_dict': {
193                 'id': '3101154703001',
194                 'ext': 'mp4',
195                 'title': 'Still no power',
196                 'uploader': 'thestar.com',
197                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
198             },
199             'add_ie': ['Brightcove'],
200         },
201         {
202             'url': 'http://www.championat.com/video/football/v/87/87499.html',
203             'md5': 'fb973ecf6e4a78a67453647444222983',
204             'info_dict': {
205                 'id': '3414141473001',
206                 'ext': 'mp4',
207                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
208                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
209                 'uploader': 'Championat',
210             },
211         },
212         {
213             # https://github.com/rg3/youtube-dl/issues/3541
214             'add_ie': ['Brightcove'],
215             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
216             'info_dict': {
217                 'id': '3866516442001',
218                 'ext': 'mp4',
219                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
220                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
221                 'uploader': 'SBS Broadcasting',
222             },
223             'skip': 'Restricted to Netherlands',
224             'params': {
225                 'skip_download': True,  # m3u8 download
226             },
227         },
228         # ooyala video
229         {
230             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
231             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
232             'info_dict': {
233                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
234                 'ext': 'mp4',
235                 'title': '2cc213299525360.mov',  # that's what we get
236             },
237             'add_ie': ['Ooyala'],
238         },
239         # multiple ooyala embeds on SBN network websites
240         {
241             'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
242             'info_dict': {
243                 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
244                 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
245             },
246             'playlist_mincount': 3,
247             'params': {
248                 'skip_download': True,
249             },
250             'add_ie': ['Ooyala'],
251         },
252         # embed.ly video
253         {
254             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
255             'info_dict': {
256                 'id': '9ODmcdjQcHQ',
257                 'ext': 'mp4',
258                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
259                 'upload_date': '20140225',
260                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
261                 'uploader': 'Tested',
262                 'uploader_id': 'testedcom',
263             },
264             # No need to test YoutubeIE here
265             'params': {
266                 'skip_download': True,
267             },
268         },
269         # funnyordie embed
270         {
271             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
272             'info_dict': {
273                 'id': '18e820ec3f',
274                 'ext': 'mp4',
275                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
276                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
277             },
278         },
279         # BBC iPlayer embeds
280         {
281             'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
282             'info_dict': {
283                 'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
284             },
285             'playlist_mincount': 18,
286         },
287         # RUTV embed
288         {
289             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
290             'info_dict': {
291                 'id': '776940',
292                 'ext': 'mp4',
293                 'title': 'Охотское море стало целиком российским',
294                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
295             },
296             'params': {
297                 # m3u8 download
298                 'skip_download': True,
299             },
300         },
301         # TVC embed
302         {
303             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
304             'info_dict': {
305                 'id': '55304',
306                 'ext': 'mp4',
307                 'title': 'Дошкольное воспитание',
308             },
309         },
310         # SportBox embed
311         {
312             'url': 'http://www.vestifinance.ru/articles/25753',
313             'info_dict': {
314                 'id': '25753',
315                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
316             },
317             'playlist': [{
318                 'info_dict': {
319                     'id': '370908',
320                     'title': 'Госзаказ. День 3',
321                     'ext': 'mp4',
322                 }
323             }, {
324                 'info_dict': {
325                     'id': '370905',
326                     'title': 'Госзаказ. День 2',
327                     'ext': 'mp4',
328                 }
329             }, {
330                 'info_dict': {
331                     'id': '370902',
332                     'title': 'Госзаказ. День 1',
333                     'ext': 'mp4',
334                 }
335             }],
336             'params': {
337                 # m3u8 download
338                 'skip_download': True,
339             },
340         },
341         # XHamster embed
342         {
343             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
344             'info_dict': {
345                 'id': 'showthread',
346                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
347             },
348             'playlist_mincount': 7,
349         },
350         # Embedded TED video
351         {
352             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
353             'md5': '65fdff94098e4a607385a60c5177c638',
354             'info_dict': {
355                 'id': '1969',
356                 'ext': 'mp4',
357                 'title': 'Hidden miracles of the natural world',
358                 'uploader': 'Louie Schwartzberg',
359                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
360             }
361         },
362         # Embeded Ustream video
363         {
364             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
365             'md5': '27b99cdb639c9b12a79bca876a073417',
366             'info_dict': {
367                 'id': '45734260',
368                 'ext': 'flv',
369                 'uploader': 'AU SPA:  The NSA and Privacy',
370                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
371             }
372         },
373         # nowvideo embed hidden behind percent encoding
374         {
375             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
376             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
377             'info_dict': {
378                 'id': '06e53103ca9aa',
379                 'ext': 'flv',
380                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
381                 'description': 'No description',
382             },
383         },
384         # arte embed
385         {
386             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
387             'md5': '7653032cbb25bf6c80d80f217055fa43',
388             'info_dict': {
389                 'id': '048195-004_PLUS7-F',
390                 'ext': 'flv',
391                 'title': 'X:enius',
392                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
393                 'upload_date': '20140320',
394             },
395             'params': {
396                 'skip_download': 'Requires rtmpdump'
397             }
398         },
399         # Condé Nast embed
400         {
401             'url': 'http://www.wired.com/2014/04/honda-asimo/',
402             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
403             'info_dict': {
404                 'id': '53501be369702d3275860000',
405                 'ext': 'mp4',
406                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
407             }
408         },
409         # Dailymotion embed
410         {
411             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
412             'md5': '441aeeb82eb72c422c7f14ec533999cd',
413             'info_dict': {
414                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
415                 'ext': 'mp4',
416                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
417                 'uploader': 'Spi0n',
418             },
419             'add_ie': ['Dailymotion'],
420         },
421         # YouTube embed
422         {
423             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
424             'info_dict': {
425                 'id': 'FXRb4ykk4S0',
426                 'ext': 'mp4',
427                 'title': 'The NBL Auction 2014',
428                 'uploader': 'BADMINTON England',
429                 'uploader_id': 'BADMINTONEvents',
430                 'upload_date': '20140603',
431                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
432             },
433             'add_ie': ['Youtube'],
434             'params': {
435                 'skip_download': True,
436             }
437         },
438         # MTVSercices embed
439         {
440             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
441             'md5': '35727f82f58c76d996fc188f9755b0d5',
442             'info_dict': {
443                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
444                 'ext': 'mp4',
445                 'title': 'Review',
446                 'description': 'Mario\'s life in the fast lane has never looked so good.',
447             },
448         },
449         # YouTube embed via <data-embed-url="">
450         {
451             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
452             'info_dict': {
453                 'id': '4vAffPZIT44',
454                 'ext': 'mp4',
455                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
456                 'uploader': 'Gameloft',
457                 'uploader_id': 'gameloft',
458                 'upload_date': '20140828',
459                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
460             },
461             'params': {
462                 'skip_download': True,
463             }
464         },
465         # Camtasia studio
466         {
467             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
468             'playlist': [{
469                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
470                 'info_dict': {
471                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
472                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
473                     'ext': 'flv',
474                     'duration': 2235.90,
475                 }
476             }, {
477                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
478                 'info_dict': {
479                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
480                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
481                     'ext': 'flv',
482                     'duration': 2235.93,
483                 }
484             }],
485             'info_dict': {
486                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
487             }
488         },
489         # Flowplayer
490         {
491             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
492             'md5': '9d65602bf31c6e20014319c7d07fba27',
493             'info_dict': {
494                 'id': '5123ea6d5e5a7',
495                 'ext': 'mp4',
496                 'age_limit': 18,
497                 'uploader': 'www.handjobhub.com',
498                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
499             }
500         },
501         # Multiple brightcove videos
502         # https://github.com/rg3/youtube-dl/issues/2283
503         {
504             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
505             'info_dict': {
506                 'id': 'always-never',
507                 'title': 'Always / Never - The New Yorker',
508             },
509             'playlist_count': 3,
510             'params': {
511                 'extract_flat': False,
512                 'skip_download': True,
513             }
514         },
515         # MLB embed
516         {
517             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
518             'md5': '96f09a37e44da40dd083e12d9a683327',
519             'info_dict': {
520                 'id': '33322633',
521                 'ext': 'mp4',
522                 'title': 'Ump changes call to ball',
523                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
524                 'duration': 48,
525                 'timestamp': 1401537900,
526                 'upload_date': '20140531',
527                 'thumbnail': 're:^https?://.*\.jpg$',
528             },
529         },
530         # Wistia embed
531         {
532             'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
533             'md5': '8788b683c777a5cf25621eaf286d0c23',
534             'info_dict': {
535                 'id': '1cfaf6b7ea',
536                 'ext': 'mov',
537                 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
538                 'duration': 643.0,
539                 'filesize': 182808282,
540                 'uploader': 'education-portal.com',
541             },
542         },
543         {
544             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
545             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
546             'info_dict': {
547                 'id': 'uxjb0lwrcz',
548                 'ext': 'mp4',
549                 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
550                 'duration': 1715.0,
551                 'uploader': 'thoughtworks.wistia.com',
552             },
553         },
554         # Soundcloud embed
555         {
556             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
557             'info_dict': {
558                 'id': '174391317',
559                 'ext': 'mp3',
560                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
561                 'uploader': 'Sophos Security',
562                 'title': 'Chet Chat 171 - Oct 29, 2014',
563                 'upload_date': '20141029',
564             }
565         },
566         # Livestream embed
567         {
568             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
569             'info_dict': {
570                 'id': '67864563',
571                 'ext': 'flv',
572                 'upload_date': '20141112',
573                 'title': 'Rosetta #CometLanding webcast HL 10',
574             }
575         },
576         # LazyYT
577         {
578             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
579             'info_dict': {
580                 'id': '1986',
581                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
582             },
583             'playlist_mincount': 2,
584         },
585         # Cinchcast embed
586         {
587             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
588             'info_dict': {
589                 'id': '7141703',
590                 'ext': 'mp3',
591                 'upload_date': '20141126',
592                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
593             }
594         },
595         # Cinerama player
596         {
597             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
598             'info_dict': {
599                 'id': '730m_DandD_1901_512k',
600                 'ext': 'mp4',
601                 'uploader': 'www.abc.net.au',
602                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
603             }
604         },
605         # embedded viddler video
606         {
607             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
608             'info_dict': {
609                 'id': '4d03aad9',
610                 'ext': 'mp4',
611                 'uploader': 'deadspin',
612                 'title': 'WALL-TO-GORTAT',
613                 'timestamp': 1422285291,
614                 'upload_date': '20150126',
615             },
616             'add_ie': ['Viddler'],
617         },
618         # Libsyn embed
619         {
620             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
621             'info_dict': {
622                 'id': '3377616',
623                 'ext': 'mp3',
624                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
625                 'description': 'md5:601cb790edd05908957dae8aaa866465',
626                 'upload_date': '20150220',
627             },
628         },
629         # jwplayer YouTube
630         {
631             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
632             'info_dict': {
633                 'id': 'Mrj4DVp2zeA',
634                 'ext': 'mp4',
635                 'upload_date': '20150212',
636                 'uploader': 'The National Archives UK',
637                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
638                 'uploader_id': 'NationalArchives08',
639                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
640             },
641         },
642         # rtl.nl embed
643         {
644             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
645             'playlist_mincount': 5,
646             'info_dict': {
647                 'id': 'aanslagen-kopenhagen',
648                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
649             }
650         },
651         # Zapiks embed
652         {
653             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
654             'info_dict': {
655                 'id': '118046',
656                 'ext': 'mp4',
657                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
658             }
659         },
660         # Kaltura embed
661         {
662             'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
663             'info_dict': {
664                 'id': '1_eergr3h1',
665                 'ext': 'mp4',
666                 'upload_date': '20150226',
667                 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
668                 'timestamp': int,
669                 'title': 'John Carlson Postgame 2/25/15',
670             },
671         },
672         # Eagle.Platform embed (generic URL)
673         {
674             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
675             'info_dict': {
676                 'id': '227304',
677                 'ext': 'mp4',
678                 'title': 'Навальный вышел на свободу',
679                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
680                 'thumbnail': 're:^https?://.*\.jpg$',
681                 'duration': 87,
682                 'view_count': int,
683                 'age_limit': 0,
684             },
685         },
686         # ClipYou (Eagle.Platform) embed (custom URL)
687         {
688             'url': 'http://muz-tv.ru/play/7129/',
689             'info_dict': {
690                 'id': '12820',
691                 'ext': 'mp4',
692                 'title': "'O Sole Mio",
693                 'thumbnail': 're:^https?://.*\.jpg$',
694                 'duration': 216,
695                 'view_count': int,
696             },
697         },
698         # Pladform embed
699         {
700             'url': 'http://muz-tv.ru/kinozal/view/7400/',
701             'info_dict': {
702                 'id': '100183293',
703                 'ext': 'mp4',
704                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
705                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
706                 'thumbnail': 're:^https?://.*\.jpg$',
707                 'duration': 694,
708                 'age_limit': 0,
709             },
710         },
711         # Playwire embed
712         {
713             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
714             'info_dict': {
715                 'id': '3519514',
716                 'ext': 'mp4',
717                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
718                 'thumbnail': 're:^https?://.*\.png$',
719                 'duration': 45.115,
720             },
721         },
722         # 5min embed
723         {
724             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
725             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
726             'info_dict': {
727                 'id': '518726732',
728                 'ext': 'mp4',
729                 'title': 'Facebook Creates "On This Day" | Crunch Report',
730             },
731         },
732         # SVT embed
733         {
734             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
735             'info_dict': {
736                 'id': '2900353',
737                 'ext': 'flv',
738                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
739                 'duration': 27,
740                 'age_limit': 0,
741             },
742         },
743         # Crooks and Liars embed
744         {
745             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
746             'info_dict': {
747                 'id': '8RUoRhRi',
748                 'ext': 'mp4',
749                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
750                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
751                 'timestamp': 1428207000,
752                 'upload_date': '20150405',
753                 'uploader': 'Heather',
754             },
755         },
756         # Crooks and Liars external embed
757         {
758             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
759             'info_dict': {
760                 'id': 'MTE3MjUtMzQ2MzA',
761                 'ext': 'mp4',
762                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
763                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
764                 'timestamp': 1265032391,
765                 'upload_date': '20100201',
766                 'uploader': 'Heather',
767             },
768         },
769         # NBC Sports vplayer embed
770         {
771             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
772             'info_dict': {
773                 'id': 'ln7x1qSThw4k',
774                 'ext': 'flv',
775                 'title': "PFT Live: New leader in the 'new-look' defense",
776                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
777             },
778         },
779         # UDN embed
780         {
781             'url': 'http://www.udn.com/news/story/7314/822787',
782             'md5': 'fd2060e988c326991037b9aff9df21a6',
783             'info_dict': {
784                 'id': '300346',
785                 'ext': 'mp4',
786                 'title': '中一中男師變性 全校師生力挺',
787                 'thumbnail': 're:^https?://.*\.jpg$',
788             }
789         },
790         # Ooyala embed
791         {
792             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
793             'info_dict': {
794                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
795                 'ext': 'mp4',
796                 'description': 'VIDEO: Index/Match versus VLOOKUP.',
797                 'title': 'This is what separates the Excel masters from the wannabes',
798             },
799             'params': {
800                 # m3u8 downloads
801                 'skip_download': True,
802             }
803         },
804         # Contains a SMIL manifest
805         {
806             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
807             'info_dict': {
808                 'id': 'file',
809                 'ext': 'flv',
810                 'title': '+ Football: Lottery Champions League Europe',
811                 'uploader': 'www.telewebion.com',
812             },
813             'params': {
814                 # rtmpe downloads
815                 'skip_download': True,
816             }
817         },
818         # Brightcove URL in single quotes
819         {
820             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
821             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
822             'info_dict': {
823                 'id': '4255764656001',
824                 'ext': 'mp4',
825                 'title': 'SN Presents: Russell Martin, World Citizen',
826                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
827                 'uploader': 'Rogers Sportsnet',
828             },
829         },
830         # Dailymotion Cloud video
831         {
832             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
833             'md5': '49444254273501a64675a7e68c502681',
834             'info_dict': {
835                 'id': '5585de919473990de4bee11b',
836                 'ext': 'mp4',
837                 'title': 'Le débat',
838                 'thumbnail': 're:^https?://.*\.jpe?g$',
839             }
840         },
841         # OnionStudios embed
842         {
843             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
844             'info_dict': {
845                 'id': '2855',
846                 'ext': 'mp4',
847                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
848                 'thumbnail': 're:^https?://.*\.jpe?g$',
849                 'uploader': 'ClickHole',
850                 'uploader_id': 'clickhole',
851             }
852         },
853         # AdobeTVVideo embed
854         {
855             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
856             'md5': '43662b577c018ad707a63766462b1e87',
857             'info_dict': {
858                 'id': '2456',
859                 'ext': 'mp4',
860                 'title': 'New experience with Acrobat DC',
861                 'description': 'New experience with Acrobat DC',
862                 'duration': 248.667,
863             },
864         }
865     ]
866
867     def report_following_redirect(self, new_url):
868         """Report information extraction."""
869         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
870
871     def _extract_rss(self, url, video_id, doc):
872         playlist_title = doc.find('./channel/title').text
873         playlist_desc_el = doc.find('./channel/description')
874         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
875
876         entries = []
877         for it in doc.findall('./channel/item'):
878             next_url = xpath_text(it, 'link', fatal=False)
879             if not next_url:
880                 enclosure_nodes = it.findall('./enclosure')
881                 for e in enclosure_nodes:
882                     next_url = e.attrib.get('url')
883                     if next_url:
884                         break
885
886             if not next_url:
887                 continue
888
889             entries.append({
890                 '_type': 'url',
891                 'url': next_url,
892                 'title': it.find('title').text,
893             })
894
895         return {
896             '_type': 'playlist',
897             'id': url,
898             'title': playlist_title,
899             'description': playlist_desc,
900             'entries': entries,
901         }
902
903     def _extract_camtasia(self, url, video_id, webpage):
904         """ Returns None if no camtasia video can be found. """
905
906         camtasia_cfg = self._search_regex(
907             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
908             webpage, 'camtasia configuration file', default=None)
909         if camtasia_cfg is None:
910             return None
911
912         title = self._html_search_meta('DC.title', webpage, fatal=True)
913
914         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
915         camtasia_cfg = self._download_xml(
916             camtasia_url, video_id,
917             note='Downloading camtasia configuration',
918             errnote='Failed to download camtasia configuration')
919         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
920
921         entries = []
922         for n in fileset_node.getchildren():
923             url_n = n.find('./uri')
924             if url_n is None:
925                 continue
926
927             entries.append({
928                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
929                 'title': '%s - %s' % (title, n.tag),
930                 'url': compat_urlparse.urljoin(url, url_n.text),
931                 'duration': float_or_none(n.find('./duration').text),
932             })
933
934         return {
935             '_type': 'playlist',
936             'entries': entries,
937             'title': title,
938         }
939
940     def _real_extract(self, url):
941         if url.startswith('//'):
942             return {
943                 '_type': 'url',
944                 'url': self.http_scheme() + url,
945             }
946
947         parsed_url = compat_urlparse.urlparse(url)
948         if not parsed_url.scheme:
949             default_search = self._downloader.params.get('default_search')
950             if default_search is None:
951                 default_search = 'fixup_error'
952
953             if default_search in ('auto', 'auto_warning', 'fixup_error'):
954                 if '/' in url:
955                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
956                     return self.url_result('http://' + url)
957                 elif default_search != 'fixup_error':
958                     if default_search == 'auto_warning':
959                         if re.match(r'^(?:url|URL)$', url):
960                             raise ExtractorError(
961                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
962                                 expected=True)
963                         else:
964                             self._downloader.report_warning(
965                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
966                     return self.url_result('ytsearch:' + url)
967
968             if default_search in ('error', 'fixup_error'):
969                 raise ExtractorError(
970                     '%r is not a valid URL. '
971                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
972                     % (url, url), expected=True)
973             else:
974                 if ':' not in default_search:
975                     default_search += ':'
976                 return self.url_result(default_search + url)
977
978         url, smuggled_data = unsmuggle_url(url)
979         force_videoid = None
980         is_intentional = smuggled_data and smuggled_data.get('to_generic')
981         if smuggled_data and 'force_videoid' in smuggled_data:
982             force_videoid = smuggled_data['force_videoid']
983             video_id = force_videoid
984         else:
985             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
986
987         self.to_screen('%s: Requesting header' % video_id)
988
989         head_req = HEADRequest(url)
990         head_response = self._request_webpage(
991             head_req, video_id,
992             note=False, errnote='Could not send HEAD request to %s' % url,
993             fatal=False)
994
995         if head_response is not False:
996             # Check for redirect
997             new_url = head_response.geturl()
998             if url != new_url:
999                 self.report_following_redirect(new_url)
1000                 if force_videoid:
1001                     new_url = smuggle_url(
1002                         new_url, {'force_videoid': force_videoid})
1003                 return self.url_result(new_url)
1004
1005         full_response = None
1006         if head_response is False:
1007             request = compat_urllib_request.Request(url)
1008             request.add_header('Accept-Encoding', '*')
1009             full_response = self._request_webpage(request, video_id)
1010             head_response = full_response
1011
1012         # Check for direct link to a video
1013         content_type = head_response.headers.get('Content-Type', '')
1014         m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
1015         if m:
1016             upload_date = unified_strdate(
1017                 head_response.headers.get('Last-Modified'))
1018             return {
1019                 'id': video_id,
1020                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1021                 'direct': True,
1022                 'formats': [{
1023                     'format_id': m.group('format_id'),
1024                     'url': url,
1025                     'vcodec': 'none' if m.group('type') == 'audio' else None
1026                 }],
1027                 'upload_date': upload_date,
1028             }
1029
1030         if not self._downloader.params.get('test', False) and not is_intentional:
1031             force = self._downloader.params.get('force_generic_extractor', False)
1032             self._downloader.report_warning(
1033                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1034
1035         if not full_response:
1036             request = compat_urllib_request.Request(url)
1037             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1038             # making it impossible to download only chunk of the file (yet we need only 512kB to
1039             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1040             # that will always result in downloading the whole file that is not desirable.
1041             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1042             # to accept raw bytes and being able to download only a chunk.
1043             # It may probably better to solve this by checking Content-Type for application/octet-stream
1044             # after HEAD request finishes, but not sure if we can rely on this.
1045             request.add_header('Accept-Encoding', '*')
1046             full_response = self._request_webpage(request, video_id)
1047
1048         # Maybe it's a direct link to a video?
1049         # Be careful not to download the whole thing!
1050         first_bytes = full_response.read(512)
1051         if not is_html(first_bytes):
1052             self._downloader.report_warning(
1053                 'URL could be a direct video link, returning it as such.')
1054             upload_date = unified_strdate(
1055                 head_response.headers.get('Last-Modified'))
1056             return {
1057                 'id': video_id,
1058                 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1059                 'direct': True,
1060                 'url': url,
1061                 'upload_date': upload_date,
1062             }
1063
1064         webpage = self._webpage_read_content(
1065             full_response, url, video_id, prefix=first_bytes)
1066
1067         self.report_extraction(video_id)
1068
1069         # Is it an RSS feed?
1070         try:
1071             doc = parse_xml(webpage)
1072             if doc.tag == 'rss':
1073                 return self._extract_rss(url, video_id, doc)
1074         except compat_xml_parse_error:
1075             pass
1076
1077         # Is it a Camtasia project?
1078         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1079         if camtasia_res is not None:
1080             return camtasia_res
1081
1082         # Sometimes embedded video player is hidden behind percent encoding
1083         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1084         # Unescaping the whole page allows to handle those cases in a generic way
1085         webpage = compat_urllib_parse.unquote(webpage)
1086
1087         # it's tempting to parse this further, but you would
1088         # have to take into account all the variations like
1089         #   Video Title - Site Name
1090         #   Site Name | Video Title
1091         #   Video Title - Tagline | Site Name
1092         # and so on and so forth; it's just not practical
1093         video_title = self._html_search_regex(
1094             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1095             default='video')
1096
1097         # Try to detect age limit automatically
1098         age_limit = self._rta_search(webpage)
1099         # And then there are the jokers who advertise that they use RTA,
1100         # but actually don't.
1101         AGE_LIMIT_MARKERS = [
1102             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1103         ]
1104         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1105             age_limit = 18
1106
1107         # video uploader is domain name
1108         video_uploader = self._search_regex(
1109             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1110
1111         # Helper method
1112         def _playlist_from_matches(matches, getter=None, ie=None):
1113             urlrs = orderedSet(
1114                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1115                 for m in matches)
1116             return self.playlist_result(
1117                 urlrs, playlist_id=video_id, playlist_title=video_title)
1118
1119         # Look for BrightCove:
1120         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
1121         if bc_urls:
1122             self.to_screen('Brightcove video detected.')
1123             entries = [{
1124                 '_type': 'url',
1125                 'url': smuggle_url(bc_url, {'Referer': url}),
1126                 'ie_key': 'Brightcove'
1127             } for bc_url in bc_urls]
1128
1129             return {
1130                 '_type': 'playlist',
1131                 'title': video_title,
1132                 'id': video_id,
1133                 'entries': entries,
1134             }
1135
1136         # Look for embedded rtl.nl player
1137         matches = re.findall(
1138             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1139             webpage)
1140         if matches:
1141             return _playlist_from_matches(matches, ie='RtlNl')
1142
1143         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1144         if vimeo_url is not None:
1145             return self.url_result(vimeo_url)
1146
1147         # Look for embedded YouTube player
1148         matches = re.findall(r'''(?x)
1149             (?:
1150                 <iframe[^>]+?src=|
1151                 data-video-url=|
1152                 <embed[^>]+?src=|
1153                 embedSWF\(?:\s*|
1154                 new\s+SWFObject\(
1155             )
1156             (["\'])
1157                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1158                 (?:embed|v|p)/.+?)
1159             \1''', webpage)
1160         if matches:
1161             return _playlist_from_matches(
1162                 matches, lambda m: unescapeHTML(m[1]))
1163
1164         # Look for lazyYT YouTube embed
1165         matches = re.findall(
1166             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1167         if matches:
1168             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1169
1170         # Look for embedded Dailymotion player
1171         matches = re.findall(
1172             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
1173         if matches:
1174             return _playlist_from_matches(
1175                 matches, lambda m: unescapeHTML(m[1]))
1176
1177         # Look for embedded Dailymotion playlist player (#3822)
1178         m = re.search(
1179             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1180         if m:
1181             playlists = re.findall(
1182                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1183             if playlists:
1184                 return _playlist_from_matches(
1185                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1186
1187         # Look for embedded Wistia player
1188         match = re.search(
1189             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1190         if match:
1191             embed_url = self._proto_relative_url(
1192                 unescapeHTML(match.group('url')))
1193             return {
1194                 '_type': 'url_transparent',
1195                 'url': embed_url,
1196                 'ie_key': 'Wistia',
1197                 'uploader': video_uploader,
1198                 'title': video_title,
1199                 'id': video_id,
1200             }
1201
1202         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1203         if match:
1204             return {
1205                 '_type': 'url_transparent',
1206                 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
1207                 'ie_key': 'Wistia',
1208                 'uploader': video_uploader,
1209                 'title': video_title,
1210                 'id': match.group('id')
1211             }
1212
1213         # Look for embedded blip.tv player
1214         bliptv_url = BlipTVIE._extract_url(webpage)
1215         if bliptv_url:
1216             return self.url_result(bliptv_url, 'BlipTV')
1217
1218         # Look for SVT player
1219         svt_url = SVTIE._extract_url(webpage)
1220         if svt_url:
1221             return self.url_result(svt_url, 'SVT')
1222
1223         # Look for embedded condenast player
1224         matches = re.findall(
1225             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1226             webpage)
1227         if matches:
1228             return {
1229                 '_type': 'playlist',
1230                 'entries': [{
1231                     '_type': 'url',
1232                     'ie_key': 'CondeNast',
1233                     'url': ma,
1234                 } for ma in matches],
1235                 'title': video_title,
1236                 'id': video_id,
1237             }
1238
1239         # Look for Bandcamp pages with custom domain
1240         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1241         if mobj is not None:
1242             burl = unescapeHTML(mobj.group(1))
1243             # Don't set the extractor because it can be a track url or an album
1244             return self.url_result(burl)
1245
1246         # Look for embedded Vevo player
1247         mobj = re.search(
1248             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1249         if mobj is not None:
1250             return self.url_result(mobj.group('url'))
1251
1252         # Look for embedded Viddler player
1253         mobj = re.search(
1254             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1255             webpage)
1256         if mobj is not None:
1257             return self.url_result(mobj.group('url'))
1258
1259         # Look for NYTimes player
1260         mobj = re.search(
1261             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1262             webpage)
1263         if mobj is not None:
1264             return self.url_result(mobj.group('url'))
1265
1266         # Look for Libsyn player
1267         mobj = re.search(
1268             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1269         if mobj is not None:
1270             return self.url_result(mobj.group('url'))
1271
1272         # Look for Ooyala videos
1273         mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1274                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1275                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1276                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1277         if mobj is not None:
1278             return OoyalaIE._build_url_result(mobj.group('ec'))
1279
1280         # Look for multiple Ooyala embeds on SBN network websites
1281         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1282         if mobj is not None:
1283             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1284             if embeds:
1285                 return _playlist_from_matches(
1286                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
1287
1288         # Look for Aparat videos
1289         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1290         if mobj is not None:
1291             return self.url_result(mobj.group(1), 'Aparat')
1292
1293         # Look for MPORA videos
1294         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1295         if mobj is not None:
1296             return self.url_result(mobj.group(1), 'Mpora')
1297
1298         # Look for embedded NovaMov-based player
1299         mobj = re.search(
1300             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1301                     (?P<url>http://(?:(?:embed|www)\.)?
1302                         (?:novamov\.com|
1303                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1304                            videoweed\.(?:es|com)|
1305                            movshare\.(?:net|sx|ag)|
1306                            divxstage\.(?:eu|net|ch|co|at|ag))
1307                         /embed\.php.+?)\1''', webpage)
1308         if mobj is not None:
1309             return self.url_result(mobj.group('url'))
1310
1311         # Look for embedded Facebook player
1312         mobj = re.search(
1313             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1314         if mobj is not None:
1315             return self.url_result(mobj.group('url'), 'Facebook')
1316
1317         # Look for embedded VK player
1318         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1319         if mobj is not None:
1320             return self.url_result(mobj.group('url'), 'VK')
1321
1322         # Look for embedded ivi player
1323         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1324         if mobj is not None:
1325             return self.url_result(mobj.group('url'), 'Ivi')
1326
1327         # Look for embedded Huffington Post player
1328         mobj = re.search(
1329             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1330         if mobj is not None:
1331             return self.url_result(mobj.group('url'), 'HuffPost')
1332
1333         # Look for embed.ly
1334         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1335         if mobj is not None:
1336             return self.url_result(mobj.group('url'))
1337         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1338         if mobj is not None:
1339             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
1340
1341         # Look for funnyordie embed
1342         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1343         if matches:
1344             return _playlist_from_matches(
1345                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1346
1347         # Look for BBC iPlayer embed
1348         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1349         if matches:
1350             return _playlist_from_matches(matches, ie='BBCCoUk')
1351
1352         # Look for embedded RUTV player
1353         rutv_url = RUTVIE._extract_url(webpage)
1354         if rutv_url:
1355             return self.url_result(rutv_url, 'RUTV')
1356
1357         # Look for embedded TVC player
1358         tvc_url = TVCIE._extract_url(webpage)
1359         if tvc_url:
1360             return self.url_result(tvc_url, 'TVC')
1361
1362         # Look for embedded SportBox player
1363         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1364         if sportbox_urls:
1365             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1366
1367         # Look for embedded PornHub player
1368         pornhub_url = PornHubIE._extract_url(webpage)
1369         if pornhub_url:
1370             return self.url_result(pornhub_url, 'PornHub')
1371
1372         # Look for embedded XHamster player
1373         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1374         if xhamster_urls:
1375             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1376
1377         # Look for embedded Tvigle player
1378         mobj = re.search(
1379             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1380         if mobj is not None:
1381             return self.url_result(mobj.group('url'), 'Tvigle')
1382
1383         # Look for embedded TED player
1384         mobj = re.search(
1385             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1386         if mobj is not None:
1387             return self.url_result(mobj.group('url'), 'TED')
1388
1389         # Look for embedded Ustream videos
1390         mobj = re.search(
1391             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1392         if mobj is not None:
1393             return self.url_result(mobj.group('url'), 'Ustream')
1394
1395         # Look for embedded arte.tv player
1396         mobj = re.search(
1397             r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
1398             webpage)
1399         if mobj is not None:
1400             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1401
1402         # Look for embedded smotri.com player
1403         smotri_url = SmotriIE._extract_url(webpage)
1404         if smotri_url:
1405             return self.url_result(smotri_url, 'Smotri')
1406
1407         # Look for embeded soundcloud player
1408         mobj = re.search(
1409             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1410             webpage)
1411         if mobj is not None:
1412             url = unescapeHTML(mobj.group('url'))
1413             return self.url_result(url)
1414
1415         # Look for embedded vulture.com player
1416         mobj = re.search(
1417             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1418             webpage)
1419         if mobj is not None:
1420             url = unescapeHTML(mobj.group('url'))
1421             return self.url_result(url, ie='Vulture')
1422
1423         # Look for embedded mtvservices player
1424         mobj = re.search(
1425             r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
1426             webpage)
1427         if mobj is not None:
1428             url = unescapeHTML(mobj.group('url'))
1429             return self.url_result(url, ie='MTVServicesEmbedded')
1430
1431         # Look for embedded yahoo player
1432         mobj = re.search(
1433             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1434             webpage)
1435         if mobj is not None:
1436             return self.url_result(mobj.group('url'), 'Yahoo')
1437
1438         # Look for embedded sbs.com.au player
1439         mobj = re.search(
1440             r'''(?x)
1441             (?:
1442                 <meta\s+property="og:video"\s+content=|
1443                 <iframe[^>]+?src=
1444             )
1445             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1446             webpage)
1447         if mobj is not None:
1448             return self.url_result(mobj.group('url'), 'SBS')
1449
1450         # Look for embedded Cinchcast player
1451         mobj = re.search(
1452             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1453             webpage)
1454         if mobj is not None:
1455             return self.url_result(mobj.group('url'), 'Cinchcast')
1456
1457         mobj = re.search(
1458             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1459             webpage)
1460         if not mobj:
1461             mobj = re.search(
1462                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1463                 webpage)
1464         if mobj is not None:
1465             return self.url_result(mobj.group('url'), 'MLB')
1466
1467         mobj = re.search(
1468             r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1469             webpage)
1470         if mobj is not None:
1471             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1472
1473         mobj = re.search(
1474             r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1475             webpage)
1476         if mobj is not None:
1477             return self.url_result(mobj.group('url'), 'Livestream')
1478
1479         # Look for Zapiks embed
1480         mobj = re.search(
1481             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1482         if mobj is not None:
1483             return self.url_result(mobj.group('url'), 'Zapiks')
1484
1485         # Look for Kaltura embeds
1486         mobj = re.search(
1487             r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1488         if mobj is not None:
1489             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1490
1491         # Look for Eagle.Platform embeds
1492         mobj = re.search(
1493             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1494         if mobj is not None:
1495             return self.url_result(mobj.group('url'), 'EaglePlatform')
1496
1497         # Look for ClipYou (uses Eagle.Platform) embeds
1498         mobj = re.search(
1499             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1500         if mobj is not None:
1501             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1502
1503         # Look for Pladform embeds
1504         mobj = re.search(
1505             r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1506         if mobj is not None:
1507             return self.url_result(mobj.group('url'), 'Pladform')
1508
1509         # Look for Playwire embeds
1510         mobj = re.search(
1511             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1512         if mobj is not None:
1513             return self.url_result(mobj.group('url'))
1514
1515         # Look for 5min embeds
1516         mobj = re.search(
1517             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1518         if mobj is not None:
1519             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1520
1521         # Look for Crooks and Liars embeds
1522         mobj = re.search(
1523             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1524         if mobj is not None:
1525             return self.url_result(mobj.group('url'))
1526
1527         # Look for NBC Sports VPlayer embeds
1528         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1529         if nbc_sports_url:
1530             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1531
1532         # Look for UDN embeds
1533         mobj = re.search(
1534             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1535         if mobj is not None:
1536             return self.url_result(
1537                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1538
1539         # Look for Senate ISVP iframe
1540         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1541         if senate_isvp_url:
1542             return self.url_result(senate_isvp_url, 'SenateISVP')
1543
1544         # Look for Dailymotion Cloud videos
1545         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1546         if dmcloud_url:
1547             return self.url_result(dmcloud_url, 'DailymotionCloud')
1548
1549         # Look for OnionStudios embeds
1550         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1551         if onionstudios_url:
1552             return self.url_result(onionstudios_url)
1553
1554         # Look for SnagFilms embeds
1555         snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
1556         if snagfilms_url:
1557             return self.url_result(snagfilms_url)
1558
1559         # Look for AdobeTVVideo embeds
1560         mobj = re.search(
1561             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
1562             webpage)
1563         if mobj is not None:
1564             return self.url_result(
1565                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
1566                 'AdobeTVVideo')
1567
1568         def check_video(vurl):
1569             if YoutubeIE.suitable(vurl):
1570                 return True
1571             vpath = compat_urlparse.urlparse(vurl).path
1572             vext = determine_ext(vpath)
1573             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1574
1575         def filter_video(urls):
1576             return list(filter(check_video, urls))
1577
1578         # Start with something easy: JW Player in SWFObject
1579         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1580         if not found:
1581             # Look for gorilla-vid style embedding
1582             found = filter_video(re.findall(r'''(?sx)
1583                 (?:
1584                     jw_plugins|
1585                     JWPlayerOptions|
1586                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
1587                 )
1588                 .*?
1589                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1590         if not found:
1591             # Broaden the search a little bit
1592             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
1593         if not found:
1594             # Broaden the findall a little bit: JWPlayer JS loader
1595             found = filter_video(re.findall(
1596                 r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1597         if not found:
1598             # Flow player
1599             found = filter_video(re.findall(r'''(?xs)
1600                 flowplayer\("[^"]+",\s*
1601                     \{[^}]+?\}\s*,
1602                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
1603                         ["']?url["']?\s*:\s*["']([^"']+)["']
1604             ''', webpage))
1605         if not found:
1606             # Cinerama player
1607             found = re.findall(
1608                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
1609         if not found:
1610             # Try to find twitter cards info
1611             found = filter_video(re.findall(
1612                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
1613         if not found:
1614             # We look for Open Graph info:
1615             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1616             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
1617             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1618             if m_video_type is not None:
1619                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
1620         if not found:
1621             # HTML5 video
1622             found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
1623         if not found:
1624             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1625             found = re.search(
1626                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
1627                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
1628                 webpage)
1629             if not found:
1630                 # Look also in Refresh HTTP header
1631                 refresh_header = head_response.headers.get('Refresh')
1632                 if refresh_header:
1633                     found = re.search(REDIRECT_REGEX, refresh_header)
1634             if found:
1635                 new_url = compat_urlparse.urljoin(url, found.group(1))
1636                 self.report_following_redirect(new_url)
1637                 return {
1638                     '_type': 'url',
1639                     'url': new_url,
1640                 }
1641         if not found:
1642             raise UnsupportedError(url)
1643
1644         entries = []
1645         for video_url in found:
1646             video_url = compat_urlparse.urljoin(url, video_url)
1647             video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1648
1649             # Sometimes, jwplayer extraction will result in a YouTube URL
1650             if YoutubeIE.suitable(video_url):
1651                 entries.append(self.url_result(video_url, 'Youtube'))
1652                 continue
1653
1654             # here's a fun little line of code for you:
1655             video_id = os.path.splitext(video_id)[0]
1656
1657             if determine_ext(video_url) == 'smil':
1658                 entries.append({
1659                     'id': video_id,
1660                     'formats': self._extract_smil_formats(video_url, video_id),
1661                     'uploader': video_uploader,
1662                     'title': video_title,
1663                     'age_limit': age_limit,
1664                 })
1665             else:
1666                 entries.append({
1667                     'id': video_id,
1668                     'url': video_url,
1669                     'uploader': video_uploader,
1670                     'title': video_title,
1671                     'age_limit': age_limit,
1672                 })
1673
1674         if len(entries) == 1:
1675             return entries[0]
1676         else:
1677             for num, e in enumerate(entries, start=1):
1678                 # 'url' results don't have a title
1679                 if e.get('title') is not None:
1680                     e['title'] = '%s (%d)' % (e['title'], num)
1681             return {
1682                 '_type': 'playlist',
1683                 'entries': entries,
1684             }