[extractor/generic] Improve kaltura embed detection (Closes #9911)
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7 import sys
8
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12     compat_etree_fromstring,
13     compat_urllib_parse_unquote,
14     compat_urlparse,
15     compat_xml_parse_error,
16 )
17 from ..utils import (
18     determine_ext,
19     ExtractorError,
20     float_or_none,
21     HEADRequest,
22     is_html,
23     orderedSet,
24     sanitized_Request,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import (
34     BrightcoveLegacyIE,
35     BrightcoveNewIE,
36 )
37 from .nbc import NBCSportsVPlayerIE
38 from .ooyala import OoyalaIE
39 from .rutv import RUTVIE
40 from .tvc import TVCIE
41 from .sportbox import SportBoxEmbedIE
42 from .smotri import SmotriIE
43 from .myvi import MyviIE
44 from .condenast import CondeNastIE
45 from .udn import UDNEmbedIE
46 from .senateisvp import SenateISVPIE
47 from .svt import SVTIE
48 from .pornhub import PornHubIE
49 from .xhamster import XHamsterEmbedIE
50 from .tnaflix import TNAFlixNetworkEmbedIE
51 from .vimeo import VimeoIE
52 from .dailymotion import DailymotionCloudIE
53 from .onionstudios import OnionStudiosIE
54 from .viewlift import ViewLiftEmbedIE
55 from .screenwavemedia import ScreenwaveMediaIE
56 from .mtv import MTVServicesEmbeddedIE
57 from .pladform import PladformIE
58 from .videomore import VideomoreIE
59 from .googledrive import GoogleDriveIE
60 from .jwplatform import JWPlatformIE
61 from .digiteka import DigitekaIE
62 from .instagram import InstagramIE
63 from .liveleak import LiveLeakIE
64 from .threeqsdn import ThreeQSDNIE
65 from .theplatform import ThePlatformIE
66 from .vessel import VesselIE
67
68
69 class GenericIE(InfoExtractor):
70     IE_DESC = 'Generic downloader that works on some sites'
71     _VALID_URL = r'.*'
72     IE_NAME = 'generic'
73     _TESTS = [
74         # Direct link to a video
75         {
76             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
77             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
78             'info_dict': {
79                 'id': 'trailer',
80                 'ext': 'mp4',
81                 'title': 'trailer',
82                 'upload_date': '20100513',
83             }
84         },
85         # Direct link to media delivered compressed (until Accept-Encoding is *)
86         {
87             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
88             'md5': '128c42e68b13950268b648275386fc74',
89             'info_dict': {
90                 'id': 'FictionJunction-Parallel_Hearts',
91                 'ext': 'flac',
92                 'title': 'FictionJunction-Parallel_Hearts',
93                 'upload_date': '20140522',
94             },
95             'expected_warnings': [
96                 'URL could be a direct video link, returning it as such.'
97             ]
98         },
99         # Direct download with broken HEAD
100         {
101             'url': 'http://ai-radio.org:8000/radio.opus',
102             'info_dict': {
103                 'id': 'radio',
104                 'ext': 'opus',
105                 'title': 'radio',
106             },
107             'params': {
108                 'skip_download': True,  # infinite live stream
109             },
110             'expected_warnings': [
111                 r'501.*Not Implemented',
112                 r'400.*Bad Request',
113             ],
114         },
115         # Direct link with incorrect MIME type
116         {
117             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
118             'md5': '4ccbebe5f36706d85221f204d7eb5913',
119             'info_dict': {
120                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
121                 'id': '5_Lennart_Poettering_-_Systemd',
122                 'ext': 'webm',
123                 'title': '5_Lennart_Poettering_-_Systemd',
124                 'upload_date': '20141120',
125             },
126             'expected_warnings': [
127                 'URL could be a direct video link, returning it as such.'
128             ]
129         },
130         # RSS feed
131         {
132             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
133             'info_dict': {
134                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
135                 'title': 'Zero Punctuation',
136                 'description': 're:.*groundbreaking video review series.*'
137             },
138             'playlist_mincount': 11,
139         },
140         # RSS feed with enclosure
141         {
142             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
143             'info_dict': {
144                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
145                 'ext': 'm4v',
146                 'upload_date': '20150228',
147                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
148             }
149         },
150         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
151         {
152             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
153             'info_dict': {
154                 'id': 'smil',
155                 'ext': 'mp4',
156                 'title': 'Automatics, robotics and biocybernetics',
157                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
158                 'upload_date': '20130627',
159                 'formats': 'mincount:16',
160                 'subtitles': 'mincount:1',
161             },
162             'params': {
163                 'force_generic_extractor': True,
164                 'skip_download': True,
165             },
166         },
167         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
168         {
169             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
170             'info_dict': {
171                 'id': 'hds',
172                 'ext': 'flv',
173                 'title': 'hds',
174                 'formats': 'mincount:1',
175             },
176             'params': {
177                 'skip_download': True,
178             },
179         },
180         # SMIL from https://www.restudy.dk/video/play/id/1637
181         {
182             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
183             'info_dict': {
184                 'id': 'video_1637',
185                 'ext': 'flv',
186                 'title': 'video_1637',
187                 'formats': 'mincount:3',
188             },
189             'params': {
190                 'skip_download': True,
191             },
192         },
193         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
194         {
195             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
196             'info_dict': {
197                 'id': 'smil-service',
198                 'ext': 'flv',
199                 'title': 'smil-service',
200                 'formats': 'mincount:1',
201             },
202             'params': {
203                 'skip_download': True,
204             },
205         },
206         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
207         {
208             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
209             'info_dict': {
210                 'id': '4719370',
211                 'ext': 'mp4',
212                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
213                 'formats': 'mincount:3',
214             },
215             'params': {
216                 'skip_download': True,
217             },
218         },
219         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
220         {
221             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
222             'info_dict': {
223                 'id': 'mZlp2ctYIUEB',
224                 'ext': 'mp4',
225                 'title': 'Tikibad ontruimd wegens brand',
226                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
227                 'thumbnail': 're:^https?://.*\.jpg$',
228                 'duration': 33,
229             },
230             'params': {
231                 'skip_download': True,
232             },
233         },
234         # MPD from http://dash-mse-test.appspot.com/media.html
235         {
236             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
237             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
238             'info_dict': {
239                 'id': 'car-20120827-manifest',
240                 'ext': 'mp4',
241                 'title': 'car-20120827-manifest',
242                 'formats': 'mincount:9',
243                 'upload_date': '20130904',
244             },
245             'params': {
246                 'format': 'bestvideo',
247             },
248         },
249         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
250         {
251             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
252             'info_dict': {
253                 'id': 'content',
254                 'ext': 'mp4',
255                 'title': 'content',
256                 'formats': 'mincount:8',
257             },
258             'params': {
259                 # m3u8 downloads
260                 'skip_download': True,
261             }
262         },
263         # m3u8 served with Content-Type: text/plain
264         {
265             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
266             'info_dict': {
267                 'id': 'index',
268                 'ext': 'mp4',
269                 'title': 'index',
270                 'upload_date': '20140720',
271                 'formats': 'mincount:11',
272             },
273             'params': {
274                 # m3u8 downloads
275                 'skip_download': True,
276             }
277         },
278         # google redirect
279         {
280             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
281             'info_dict': {
282                 'id': 'cmQHVoWB5FY',
283                 'ext': 'mp4',
284                 'upload_date': '20130224',
285                 'uploader_id': 'TheVerge',
286                 'description': 're:^Chris Ziegler takes a look at the\.*',
287                 'uploader': 'The Verge',
288                 'title': 'First Firefox OS phones side-by-side',
289             },
290             'params': {
291                 'skip_download': False,
292             }
293         },
294         {
295             # redirect in Refresh HTTP header
296             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
297             'info_dict': {
298                 'id': 'pO8h3EaFRdo',
299                 'ext': 'mp4',
300                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
301                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
302                 'upload_date': '20150917',
303                 'uploader_id': 'brtvofficial',
304                 'uploader': 'Boiler Room',
305             },
306             'params': {
307                 'skip_download': False,
308             },
309         },
310         {
311             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
312             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
313             'info_dict': {
314                 'id': '13601338388002',
315                 'ext': 'mp4',
316                 'uploader': 'www.hodiho.fr',
317                 'title': 'R\u00e9gis plante sa Jeep',
318             }
319         },
320         # bandcamp page with custom domain
321         {
322             'add_ie': ['Bandcamp'],
323             'url': 'http://bronyrock.com/track/the-pony-mash',
324             'info_dict': {
325                 'id': '3235767654',
326                 'ext': 'mp3',
327                 'title': 'The Pony Mash',
328                 'uploader': 'M_Pallante',
329             },
330             'skip': 'There is a limit of 200 free downloads / month for the test song',
331         },
332         # embedded brightcove video
333         # it also tests brightcove videos that need to set the 'Referer' in the
334         # http requests
335         {
336             'add_ie': ['BrightcoveLegacy'],
337             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
338             'info_dict': {
339                 'id': '2765128793001',
340                 'ext': 'mp4',
341                 'title': 'Le cours de bourse : l’analyse technique',
342                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
343                 'uploader': 'BFM BUSINESS',
344             },
345             'params': {
346                 'skip_download': True,
347             },
348         },
349         {
350             # https://github.com/rg3/youtube-dl/issues/2253
351             'url': 'http://bcove.me/i6nfkrc3',
352             'md5': '0ba9446db037002366bab3b3eb30c88c',
353             'info_dict': {
354                 'id': '3101154703001',
355                 'ext': 'mp4',
356                 'title': 'Still no power',
357                 'uploader': 'thestar.com',
358                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
359             },
360             'add_ie': ['BrightcoveLegacy'],
361         },
362         {
363             'url': 'http://www.championat.com/video/football/v/87/87499.html',
364             'md5': 'fb973ecf6e4a78a67453647444222983',
365             'info_dict': {
366                 'id': '3414141473001',
367                 'ext': 'mp4',
368                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
369                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
370                 'uploader': 'Championat',
371             },
372         },
373         {
374             # https://github.com/rg3/youtube-dl/issues/3541
375             'add_ie': ['BrightcoveLegacy'],
376             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
377             'info_dict': {
378                 'id': '3866516442001',
379                 'ext': 'mp4',
380                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
381                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
382                 'uploader': 'SBS Broadcasting',
383             },
384             'skip': 'Restricted to Netherlands',
385             'params': {
386                 'skip_download': True,  # m3u8 download
387             },
388         },
389         # ooyala video
390         {
391             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
392             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
393             'info_dict': {
394                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
395                 'ext': 'mp4',
396                 'title': '2cc213299525360.mov',  # that's what we get
397                 'duration': 238.231,
398             },
399             'add_ie': ['Ooyala'],
400         },
401         {
402             # ooyala video embedded with http://player.ooyala.com/iframe.js
403             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
404             'info_dict': {
405                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
406                 'ext': 'mp4',
407                 'title': '"Steve Jobs: Man in the Machine" trailer',
408                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
409                 'duration': 135.427,
410             },
411             'params': {
412                 'skip_download': True,
413             },
414         },
415         # embed.ly video
416         {
417             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
418             'info_dict': {
419                 'id': '9ODmcdjQcHQ',
420                 'ext': 'mp4',
421                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
422                 'upload_date': '20140225',
423                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
424                 'uploader': 'Tested',
425                 'uploader_id': 'testedcom',
426             },
427             # No need to test YoutubeIE here
428             'params': {
429                 'skip_download': True,
430             },
431         },
432         # funnyordie embed
433         {
434             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
435             'info_dict': {
436                 'id': '18e820ec3f',
437                 'ext': 'mp4',
438                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
439                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
440             },
441         },
442         # RUTV embed
443         {
444             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
445             'info_dict': {
446                 'id': '776940',
447                 'ext': 'mp4',
448                 'title': 'Охотское море стало целиком российским',
449                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
450             },
451             'params': {
452                 # m3u8 download
453                 'skip_download': True,
454             },
455         },
456         # TVC embed
457         {
458             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
459             'info_dict': {
460                 'id': '55304',
461                 'ext': 'mp4',
462                 'title': 'Дошкольное воспитание',
463             },
464         },
465         # SportBox embed
466         {
467             'url': 'http://www.vestifinance.ru/articles/25753',
468             'info_dict': {
469                 'id': '25753',
470                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
471             },
472             'playlist': [{
473                 'info_dict': {
474                     'id': '370908',
475                     'title': 'Госзаказ. День 3',
476                     'ext': 'mp4',
477                 }
478             }, {
479                 'info_dict': {
480                     'id': '370905',
481                     'title': 'Госзаказ. День 2',
482                     'ext': 'mp4',
483                 }
484             }, {
485                 'info_dict': {
486                     'id': '370902',
487                     'title': 'Госзаказ. День 1',
488                     'ext': 'mp4',
489                 }
490             }],
491             'params': {
492                 # m3u8 download
493                 'skip_download': True,
494             },
495         },
496         # Myvi.ru embed
497         {
498             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
499             'info_dict': {
500                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
501                 'ext': 'mp4',
502                 'title': 'Ужастики, русский трейлер (2015)',
503                 'thumbnail': 're:^https?://.*\.jpg$',
504                 'duration': 153,
505             }
506         },
507         # XHamster embed
508         {
509             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
510             'info_dict': {
511                 'id': 'showthread',
512                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
513             },
514             'playlist_mincount': 7,
515         },
516         # Embedded TED video
517         {
518             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
519             'md5': '65fdff94098e4a607385a60c5177c638',
520             'info_dict': {
521                 'id': '1969',
522                 'ext': 'mp4',
523                 'title': 'Hidden miracles of the natural world',
524                 'uploader': 'Louie Schwartzberg',
525                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
526             }
527         },
528         # Embedded Ustream video
529         {
530             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
531             'md5': '27b99cdb639c9b12a79bca876a073417',
532             'info_dict': {
533                 'id': '45734260',
534                 'ext': 'flv',
535                 'uploader': 'AU SPA:  The NSA and Privacy',
536                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
537             }
538         },
539         # nowvideo embed hidden behind percent encoding
540         {
541             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
542             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
543             'info_dict': {
544                 'id': '06e53103ca9aa',
545                 'ext': 'flv',
546                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
547                 'description': 'No description',
548             },
549         },
550         # arte embed
551         {
552             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
553             'md5': '7653032cbb25bf6c80d80f217055fa43',
554             'info_dict': {
555                 'id': '048195-004_PLUS7-F',
556                 'ext': 'flv',
557                 'title': 'X:enius',
558                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
559                 'upload_date': '20140320',
560             },
561             'params': {
562                 'skip_download': 'Requires rtmpdump'
563             }
564         },
565         # francetv embed
566         {
567             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
568             'info_dict': {
569                 'id': 'EV_30231',
570                 'ext': 'mp4',
571                 'title': 'Alcaline, le concert avec Calogero',
572                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
573                 'upload_date': '20150226',
574                 'timestamp': 1424989860,
575                 'duration': 5400,
576             },
577             'params': {
578                 # m3u8 downloads
579                 'skip_download': True,
580             },
581             'expected_warnings': [
582                 'Forbidden'
583             ]
584         },
585         # Condé Nast embed
586         {
587             'url': 'http://www.wired.com/2014/04/honda-asimo/',
588             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
589             'info_dict': {
590                 'id': '53501be369702d3275860000',
591                 'ext': 'mp4',
592                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
593             }
594         },
595         # Dailymotion embed
596         {
597             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
598             'md5': '441aeeb82eb72c422c7f14ec533999cd',
599             'info_dict': {
600                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
601                 'ext': 'mp4',
602                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
603                 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
604                 'uploader': 'Spi0n',
605                 'uploader_id': 'xgditw',
606                 'upload_date': '20140425',
607                 'timestamp': 1398441542,
608             },
609             'add_ie': ['Dailymotion'],
610         },
611         # YouTube embed
612         {
613             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
614             'info_dict': {
615                 'id': 'FXRb4ykk4S0',
616                 'ext': 'mp4',
617                 'title': 'The NBL Auction 2014',
618                 'uploader': 'BADMINTON England',
619                 'uploader_id': 'BADMINTONEvents',
620                 'upload_date': '20140603',
621                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
622             },
623             'add_ie': ['Youtube'],
624             'params': {
625                 'skip_download': True,
626             }
627         },
628         # MTVSercices embed
629         {
630             'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
631             'md5': 'ca1aef97695ef2c1d6973256a57e5252',
632             'info_dict': {
633                 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
634                 'ext': 'mp4',
635                 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
636                 'description': 'Two valets share their love for movie star Liam Neesons.',
637             },
638         },
639         # YouTube embed via <data-embed-url="">
640         {
641             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
642             'info_dict': {
643                 'id': '4vAffPZIT44',
644                 'ext': 'mp4',
645                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
646                 'uploader': 'Gameloft',
647                 'uploader_id': 'gameloft',
648                 'upload_date': '20140828',
649                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
650             },
651             'params': {
652                 'skip_download': True,
653             }
654         },
655         # Camtasia studio
656         {
657             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
658             'playlist': [{
659                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
660                 'info_dict': {
661                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
662                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
663                     'ext': 'flv',
664                     'duration': 2235.90,
665                 }
666             }, {
667                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
668                 'info_dict': {
669                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
670                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
671                     'ext': 'flv',
672                     'duration': 2235.93,
673                 }
674             }],
675             'info_dict': {
676                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
677             }
678         },
679         # Flowplayer
680         {
681             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
682             'md5': '9d65602bf31c6e20014319c7d07fba27',
683             'info_dict': {
684                 'id': '5123ea6d5e5a7',
685                 'ext': 'mp4',
686                 'age_limit': 18,
687                 'uploader': 'www.handjobhub.com',
688                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
689             }
690         },
691         # Multiple brightcove videos
692         # https://github.com/rg3/youtube-dl/issues/2283
693         {
694             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
695             'info_dict': {
696                 'id': 'always-never',
697                 'title': 'Always / Never - The New Yorker',
698             },
699             'playlist_count': 3,
700             'params': {
701                 'extract_flat': False,
702                 'skip_download': True,
703             }
704         },
705         # MLB embed
706         {
707             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
708             'md5': '96f09a37e44da40dd083e12d9a683327',
709             'info_dict': {
710                 'id': '33322633',
711                 'ext': 'mp4',
712                 'title': 'Ump changes call to ball',
713                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
714                 'duration': 48,
715                 'timestamp': 1401537900,
716                 'upload_date': '20140531',
717                 'thumbnail': 're:^https?://.*\.jpg$',
718             },
719         },
720         # Wistia embed
721         {
722             'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
723             'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
724             'info_dict': {
725                 'id': '6e2wtrbdaf',
726                 'ext': 'mov',
727                 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
728                 'description': 'a Paywall Videos video from Remilon',
729                 'duration': 644.072,
730                 'uploader': 'study.com',
731                 'timestamp': 1459678540,
732                 'upload_date': '20160403',
733                 'filesize': 24687186,
734             },
735         },
736         {
737             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
738             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
739             'info_dict': {
740                 'id': 'uxjb0lwrcz',
741                 'ext': 'mp4',
742                 'title': 'Conversation about Hexagonal Rails Part 1',
743                 'description': 'a Martin Fowler video from ThoughtWorks',
744                 'duration': 1715.0,
745                 'uploader': 'thoughtworks.wistia.com',
746                 'timestamp': 1401832161,
747                 'upload_date': '20140603',
748             },
749         },
750         # Wistia standard embed (async)
751         {
752             'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
753             'info_dict': {
754                 'id': '807fafadvk',
755                 'ext': 'mp4',
756                 'title': 'Drip Brennan Dunn Workshop',
757                 'description': 'a JV Webinars video from getdrip-1',
758                 'duration': 4986.95,
759                 'timestamp': 1463607249,
760                 'upload_date': '20160518',
761             },
762             'params': {
763                 'skip_download': True,
764             }
765         },
766         # Soundcloud embed
767         {
768             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
769             'info_dict': {
770                 'id': '174391317',
771                 'ext': 'mp3',
772                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
773                 'uploader': 'Sophos Security',
774                 'title': 'Chet Chat 171 - Oct 29, 2014',
775                 'upload_date': '20141029',
776             }
777         },
778         # Livestream embed
779         {
780             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
781             'info_dict': {
782                 'id': '67864563',
783                 'ext': 'flv',
784                 'upload_date': '20141112',
785                 'title': 'Rosetta #CometLanding webcast HL 10',
786             }
787         },
788         # Another Livestream embed, without 'new.' in URL
789         {
790             'url': 'https://www.freespeech.org/',
791             'info_dict': {
792                 'id': '123537347',
793                 'ext': 'mp4',
794                 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
795             },
796             'params': {
797                 # Live stream
798                 'skip_download': True,
799             },
800         },
801         # LazyYT
802         {
803             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
804             'info_dict': {
805                 'id': '1986',
806                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
807             },
808             'playlist_mincount': 2,
809         },
810         # Cinchcast embed
811         {
812             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
813             'info_dict': {
814                 'id': '7141703',
815                 'ext': 'mp3',
816                 'upload_date': '20141126',
817                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
818             }
819         },
820         # Cinerama player
821         {
822             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
823             'info_dict': {
824                 'id': '730m_DandD_1901_512k',
825                 'ext': 'mp4',
826                 'uploader': 'www.abc.net.au',
827                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
828             }
829         },
830         # embedded viddler video
831         {
832             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
833             'info_dict': {
834                 'id': '4d03aad9',
835                 'ext': 'mp4',
836                 'uploader': 'deadspin',
837                 'title': 'WALL-TO-GORTAT',
838                 'timestamp': 1422285291,
839                 'upload_date': '20150126',
840             },
841             'add_ie': ['Viddler'],
842         },
843         # Libsyn embed
844         {
845             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
846             'info_dict': {
847                 'id': '3377616',
848                 'ext': 'mp3',
849                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
850                 'description': 'md5:601cb790edd05908957dae8aaa866465',
851                 'upload_date': '20150220',
852             },
853         },
854         # jwplayer YouTube
855         {
856             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
857             'info_dict': {
858                 'id': 'Mrj4DVp2zeA',
859                 'ext': 'mp4',
860                 'upload_date': '20150212',
861                 'uploader': 'The National Archives UK',
862                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
863                 'uploader_id': 'NationalArchives08',
864                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
865             },
866         },
867         # rtl.nl embed
868         {
869             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
870             'playlist_mincount': 5,
871             'info_dict': {
872                 'id': 'aanslagen-kopenhagen',
873                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
874             }
875         },
876         # Zapiks embed
877         {
878             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
879             'info_dict': {
880                 'id': '118046',
881                 'ext': 'mp4',
882                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
883             }
884         },
885         # Kaltura embed (different embed code)
886         {
887             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
888             'info_dict': {
889                 'id': '1_a52wc67y',
890                 'ext': 'flv',
891                 'upload_date': '20150127',
892                 'uploader_id': 'PremierMedia',
893                 'timestamp': int,
894                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
895             },
896         },
897         # Kaltura embed protected with referrer
898         {
899             'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
900             'info_dict': {
901                 'id': '1_g4fbemnq',
902                 'ext': 'mp4',
903                 'title': 'Violetta - Achter De Schermen - Ruggero',
904                 'description': 'Achter de schermen met Ruggero',
905                 'timestamp': 1435133761,
906                 'upload_date': '20150624',
907                 'uploader_id': 'echojecka',
908             },
909         },
910         # Kaltura embed with single quotes
911         {
912             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
913             'info_dict': {
914                 'id': '0_izeg5utt',
915                 'ext': 'mp4',
916                 'title': '35871',
917                 'timestamp': 1355743100,
918                 'upload_date': '20121217',
919                 'uploader_id': 'batchUser',
920             },
921             'add_ie': ['Kaltura'],
922         },
923         {
924             # Kaltura embedded via quoted entry_id
925             'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
926             'info_dict': {
927                 'id': '0_utuok90b',
928                 'ext': 'mp4',
929                 'title': '06_matthew_brender_raj_dutt',
930                 'timestamp': 1466638791,
931                 'upload_date': '20160622',
932             },
933             'add_ie': ['Kaltura'],
934             'expected_warnings': [
935                 'Could not send HEAD request'
936             ],
937             'params': {
938                 'skip_download': True,
939             }
940         },
941         # Eagle.Platform embed (generic URL)
942         {
943             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
944             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
945             'info_dict': {
946                 'id': '227304',
947                 'ext': 'mp4',
948                 'title': 'Навальный вышел на свободу',
949                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
950                 'thumbnail': 're:^https?://.*\.jpg$',
951                 'duration': 87,
952                 'view_count': int,
953                 'age_limit': 0,
954             },
955         },
956         # ClipYou (Eagle.Platform) embed (custom URL)
957         {
958             'url': 'http://muz-tv.ru/play/7129/',
959             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
960             'info_dict': {
961                 'id': '12820',
962                 'ext': 'mp4',
963                 'title': "'O Sole Mio",
964                 'thumbnail': 're:^https?://.*\.jpg$',
965                 'duration': 216,
966                 'view_count': int,
967             },
968         },
969         # Pladform embed
970         {
971             'url': 'http://muz-tv.ru/kinozal/view/7400/',
972             'info_dict': {
973                 'id': '100183293',
974                 'ext': 'mp4',
975                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
976                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
977                 'thumbnail': 're:^https?://.*\.jpg$',
978                 'duration': 694,
979                 'age_limit': 0,
980             },
981         },
982         # Playwire embed
983         {
984             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
985             'info_dict': {
986                 'id': '3519514',
987                 'ext': 'mp4',
988                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
989                 'thumbnail': 're:^https?://.*\.png$',
990                 'duration': 45.115,
991             },
992         },
993         # 5min embed
994         {
995             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
996             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
997             'info_dict': {
998                 'id': '518726732',
999                 'ext': 'mp4',
1000                 'title': 'Facebook Creates "On This Day" | Crunch Report',
1001             },
1002         },
1003         # SVT embed
1004         {
1005             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
1006             'info_dict': {
1007                 'id': '2900353',
1008                 'ext': 'flv',
1009                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
1010                 'duration': 27,
1011                 'age_limit': 0,
1012             },
1013         },
1014         # Crooks and Liars embed
1015         {
1016             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1017             'info_dict': {
1018                 'id': '8RUoRhRi',
1019                 'ext': 'mp4',
1020                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1021                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1022                 'timestamp': 1428207000,
1023                 'upload_date': '20150405',
1024                 'uploader': 'Heather',
1025             },
1026         },
1027         # Crooks and Liars external embed
1028         {
1029             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1030             'info_dict': {
1031                 'id': 'MTE3MjUtMzQ2MzA',
1032                 'ext': 'mp4',
1033                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1034                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1035                 'timestamp': 1265032391,
1036                 'upload_date': '20100201',
1037                 'uploader': 'Heather',
1038             },
1039         },
1040         # NBC Sports vplayer embed
1041         {
1042             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1043             'info_dict': {
1044                 'id': 'ln7x1qSThw4k',
1045                 'ext': 'flv',
1046                 'title': "PFT Live: New leader in the 'new-look' defense",
1047                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1048                 'uploader': 'NBCU-SPORTS',
1049                 'upload_date': '20140107',
1050                 'timestamp': 1389118457,
1051             },
1052         },
1053         # NBC News embed
1054         {
1055             'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1056             'md5': '1aa589c675898ae6d37a17913cf68d66',
1057             'info_dict': {
1058                 'id': '701714499682',
1059                 'ext': 'mp4',
1060                 'title': 'PREVIEW: On Assignment: David Letterman',
1061                 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
1062             },
1063         },
1064         # UDN embed
1065         {
1066             'url': 'https://video.udn.com/news/300346',
1067             'md5': 'fd2060e988c326991037b9aff9df21a6',
1068             'info_dict': {
1069                 'id': '300346',
1070                 'ext': 'mp4',
1071                 'title': '中一中男師變性 全校師生力挺',
1072                 'thumbnail': 're:^https?://.*\.jpg$',
1073             },
1074             'params': {
1075                 # m3u8 download
1076                 'skip_download': True,
1077             },
1078         },
1079         # Ooyala embed
1080         {
1081             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1082             'info_dict': {
1083                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1084                 'ext': 'mp4',
1085                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1086                 'title': 'This is what separates the Excel masters from the wannabes',
1087                 'duration': 191.933,
1088             },
1089             'params': {
1090                 # m3u8 downloads
1091                 'skip_download': True,
1092             }
1093         },
1094         # Brightcove URL in single quotes
1095         {
1096             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1097             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1098             'info_dict': {
1099                 'id': '4255764656001',
1100                 'ext': 'mp4',
1101                 'title': 'SN Presents: Russell Martin, World Citizen',
1102                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1103                 'uploader': 'Rogers Sportsnet',
1104                 'uploader_id': '1704050871',
1105                 'upload_date': '20150525',
1106                 'timestamp': 1432570283,
1107             },
1108         },
1109         # Dailymotion Cloud video
1110         {
1111             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1112             'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
1113             'info_dict': {
1114                 'id': 'x2uy8t3',
1115                 'ext': 'mp4',
1116                 'title': 'Sauvons les abeilles ! - Le débat',
1117                 'description': 'md5:d9082128b1c5277987825d684939ca26',
1118                 'thumbnail': 're:^https?://.*\.jpe?g$',
1119                 'timestamp': 1434970506,
1120                 'upload_date': '20150622',
1121                 'uploader': 'Public Sénat',
1122                 'uploader_id': 'xa9gza',
1123             }
1124         },
1125         # OnionStudios embed
1126         {
1127             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1128             'info_dict': {
1129                 'id': '2855',
1130                 'ext': 'mp4',
1131                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1132                 'thumbnail': 're:^https?://.*\.jpe?g$',
1133                 'uploader': 'ClickHole',
1134                 'uploader_id': 'clickhole',
1135             }
1136         },
1137         # SnagFilms embed
1138         {
1139             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1140             'info_dict': {
1141                 'id': '74849a00-85a9-11e1-9660-123139220831',
1142                 'ext': 'mp4',
1143                 'title': '#whilewewatch',
1144             }
1145         },
1146         # AdobeTVVideo embed
1147         {
1148             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1149             'md5': '43662b577c018ad707a63766462b1e87',
1150             'info_dict': {
1151                 'id': '2456',
1152                 'ext': 'mp4',
1153                 'title': 'New experience with Acrobat DC',
1154                 'description': 'New experience with Acrobat DC',
1155                 'duration': 248.667,
1156             },
1157         },
1158         # ScreenwaveMedia embed
1159         {
1160             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1161             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1162             'info_dict': {
1163                 'id': 'cinemasnob-55d26273809dd',
1164                 'ext': 'mp4',
1165                 'title': 'cinemasnob',
1166             },
1167         },
1168         # BrightcoveInPageEmbed embed
1169         {
1170             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1171             'info_dict': {
1172                 'id': '4238694884001',
1173                 'ext': 'flv',
1174                 'title': 'Tabletop: Dread, Last Thoughts',
1175                 'description': 'Tabletop: Dread, Last Thoughts',
1176                 'duration': 51690,
1177             },
1178         },
1179         # JWPlayer with M3U8
1180         {
1181             'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1182             'info_dict': {
1183                 'id': 'playlist',
1184                 'ext': 'mp4',
1185                 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1186                 'uploader': 'ren.tv',
1187             },
1188             'params': {
1189                 # m3u8 downloads
1190                 'skip_download': True,
1191             }
1192         },
1193         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1194         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1195         {
1196             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1197             'info_dict': {
1198                 'id': '4785848093001',
1199                 'ext': 'mp4',
1200                 'title': 'The Cardinal Pell Interview',
1201                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1202                 'uploader': 'GlobeCast Australia - GlobeStream',
1203                 'uploader_id': '2733773828001',
1204                 'upload_date': '20160304',
1205                 'timestamp': 1457083087,
1206             },
1207             'params': {
1208                 # m3u8 downloads
1209                 'skip_download': True,
1210             },
1211         },
1212         # Another form of arte.tv embed
1213         {
1214             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1215             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1216             'info_dict': {
1217                 'id': '030273-562_PLUS7-F',
1218                 'ext': 'mp4',
1219                 'title': 'ARTE Reportage - Nulle part, en France',
1220                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1221                 'upload_date': '20160409',
1222             },
1223         },
1224         # LiveLeak embed
1225         {
1226             'url': 'http://www.wykop.pl/link/3088787/',
1227             'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1228             'info_dict': {
1229                 'id': '874_1459135191',
1230                 'ext': 'mp4',
1231                 'title': 'Man shows poor quality of new apartment building',
1232                 'description': 'The wall is like a sand pile.',
1233                 'uploader': 'Lake8737',
1234             }
1235         },
1236         # Duplicated embedded video URLs
1237         {
1238             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1239             'info_dict': {
1240                 'id': '149298443_480_16c25b74_2',
1241                 'ext': 'mp4',
1242                 'title': 'vs. Blue Orange Spring Game',
1243                 'uploader': 'www.hudl.com',
1244             },
1245         },
1246     ]
1247
1248     def report_following_redirect(self, new_url):
1249         """Report information extraction."""
1250         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1251
1252     def _extract_rss(self, url, video_id, doc):
1253         playlist_title = doc.find('./channel/title').text
1254         playlist_desc_el = doc.find('./channel/description')
1255         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1256
1257         entries = []
1258         for it in doc.findall('./channel/item'):
1259             next_url = xpath_text(it, 'link', fatal=False)
1260             if not next_url:
1261                 enclosure_nodes = it.findall('./enclosure')
1262                 for e in enclosure_nodes:
1263                     next_url = e.attrib.get('url')
1264                     if next_url:
1265                         break
1266
1267             if not next_url:
1268                 continue
1269
1270             entries.append({
1271                 '_type': 'url',
1272                 'url': next_url,
1273                 'title': it.find('title').text,
1274             })
1275
1276         return {
1277             '_type': 'playlist',
1278             'id': url,
1279             'title': playlist_title,
1280             'description': playlist_desc,
1281             'entries': entries,
1282         }
1283
1284     def _extract_camtasia(self, url, video_id, webpage):
1285         """ Returns None if no camtasia video can be found. """
1286
1287         camtasia_cfg = self._search_regex(
1288             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1289             webpage, 'camtasia configuration file', default=None)
1290         if camtasia_cfg is None:
1291             return None
1292
1293         title = self._html_search_meta('DC.title', webpage, fatal=True)
1294
1295         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1296         camtasia_cfg = self._download_xml(
1297             camtasia_url, video_id,
1298             note='Downloading camtasia configuration',
1299             errnote='Failed to download camtasia configuration')
1300         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1301
1302         entries = []
1303         for n in fileset_node.getchildren():
1304             url_n = n.find('./uri')
1305             if url_n is None:
1306                 continue
1307
1308             entries.append({
1309                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1310                 'title': '%s - %s' % (title, n.tag),
1311                 'url': compat_urlparse.urljoin(url, url_n.text),
1312                 'duration': float_or_none(n.find('./duration').text),
1313             })
1314
1315         return {
1316             '_type': 'playlist',
1317             'entries': entries,
1318             'title': title,
1319         }
1320
1321     def _real_extract(self, url):
1322         if url.startswith('//'):
1323             return {
1324                 '_type': 'url',
1325                 'url': self.http_scheme() + url,
1326             }
1327
1328         parsed_url = compat_urlparse.urlparse(url)
1329         if not parsed_url.scheme:
1330             default_search = self._downloader.params.get('default_search')
1331             if default_search is None:
1332                 default_search = 'fixup_error'
1333
1334             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1335                 if '/' in url:
1336                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1337                     return self.url_result('http://' + url)
1338                 elif default_search != 'fixup_error':
1339                     if default_search == 'auto_warning':
1340                         if re.match(r'^(?:url|URL)$', url):
1341                             raise ExtractorError(
1342                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1343                                 expected=True)
1344                         else:
1345                             self._downloader.report_warning(
1346                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1347                     return self.url_result('ytsearch:' + url)
1348
1349             if default_search in ('error', 'fixup_error'):
1350                 raise ExtractorError(
1351                     '%r is not a valid URL. '
1352                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1353                     % (url, url), expected=True)
1354             else:
1355                 if ':' not in default_search:
1356                     default_search += ':'
1357                 return self.url_result(default_search + url)
1358
1359         url, smuggled_data = unsmuggle_url(url)
1360         force_videoid = None
1361         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1362         if smuggled_data and 'force_videoid' in smuggled_data:
1363             force_videoid = smuggled_data['force_videoid']
1364             video_id = force_videoid
1365         else:
1366             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1367
1368         self.to_screen('%s: Requesting header' % video_id)
1369
1370         head_req = HEADRequest(url)
1371         head_response = self._request_webpage(
1372             head_req, video_id,
1373             note=False, errnote='Could not send HEAD request to %s' % url,
1374             fatal=False)
1375
1376         if head_response is not False:
1377             # Check for redirect
1378             new_url = head_response.geturl()
1379             if url != new_url:
1380                 self.report_following_redirect(new_url)
1381                 if force_videoid:
1382                     new_url = smuggle_url(
1383                         new_url, {'force_videoid': force_videoid})
1384                 return self.url_result(new_url)
1385
1386         full_response = None
1387         if head_response is False:
1388             request = sanitized_Request(url)
1389             request.add_header('Accept-Encoding', '*')
1390             full_response = self._request_webpage(request, video_id)
1391             head_response = full_response
1392
1393         info_dict = {
1394             'id': video_id,
1395             'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1396             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1397         }
1398
1399         # Check for direct link to a video
1400         content_type = head_response.headers.get('Content-Type', '').lower()
1401         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1402         if m:
1403             format_id = m.group('format_id')
1404             if format_id.endswith('mpegurl'):
1405                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1406             elif format_id == 'f4m':
1407                 formats = self._extract_f4m_formats(url, video_id)
1408             else:
1409                 formats = [{
1410                     'format_id': m.group('format_id'),
1411                     'url': url,
1412                     'vcodec': 'none' if m.group('type') == 'audio' else None
1413                 }]
1414                 info_dict['direct'] = True
1415             self._sort_formats(formats)
1416             info_dict['formats'] = formats
1417             return info_dict
1418
1419         if not self._downloader.params.get('test', False) and not is_intentional:
1420             force = self._downloader.params.get('force_generic_extractor', False)
1421             self._downloader.report_warning(
1422                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1423
1424         if not full_response:
1425             request = sanitized_Request(url)
1426             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1427             # making it impossible to download only chunk of the file (yet we need only 512kB to
1428             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1429             # that will always result in downloading the whole file that is not desirable.
1430             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1431             # to accept raw bytes and being able to download only a chunk.
1432             # It may probably better to solve this by checking Content-Type for application/octet-stream
1433             # after HEAD request finishes, but not sure if we can rely on this.
1434             request.add_header('Accept-Encoding', '*')
1435             full_response = self._request_webpage(request, video_id)
1436
1437         first_bytes = full_response.read(512)
1438
1439         # Is it an M3U playlist?
1440         if first_bytes.startswith(b'#EXTM3U'):
1441             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1442             self._sort_formats(info_dict['formats'])
1443             return info_dict
1444
1445         # Maybe it's a direct link to a video?
1446         # Be careful not to download the whole thing!
1447         if not is_html(first_bytes):
1448             self._downloader.report_warning(
1449                 'URL could be a direct video link, returning it as such.')
1450             info_dict.update({
1451                 'direct': True,
1452                 'url': url,
1453             })
1454             return info_dict
1455
1456         webpage = self._webpage_read_content(
1457             full_response, url, video_id, prefix=first_bytes)
1458
1459         self.report_extraction(video_id)
1460
1461         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1462         try:
1463             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1464             if doc.tag == 'rss':
1465                 return self._extract_rss(url, video_id, doc)
1466             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1467                 smil = self._parse_smil(doc, url, video_id)
1468                 self._sort_formats(smil['formats'])
1469                 return smil
1470             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1471                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1472             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1473                 info_dict['formats'] = self._parse_mpd_formats(
1474                     doc, video_id, mpd_base_url=url.rpartition('/')[0])
1475                 self._sort_formats(info_dict['formats'])
1476                 return info_dict
1477             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1478                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1479                 self._sort_formats(info_dict['formats'])
1480                 return info_dict
1481         except compat_xml_parse_error:
1482             pass
1483
1484         # Is it a Camtasia project?
1485         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1486         if camtasia_res is not None:
1487             return camtasia_res
1488
1489         # Sometimes embedded video player is hidden behind percent encoding
1490         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1491         # Unescaping the whole page allows to handle those cases in a generic way
1492         webpage = compat_urllib_parse_unquote(webpage)
1493
1494         # it's tempting to parse this further, but you would
1495         # have to take into account all the variations like
1496         #   Video Title - Site Name
1497         #   Site Name | Video Title
1498         #   Video Title - Tagline | Site Name
1499         # and so on and so forth; it's just not practical
1500         video_title = self._og_search_title(
1501             webpage, default=None) or self._html_search_regex(
1502             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1503             default='video')
1504
1505         # Try to detect age limit automatically
1506         age_limit = self._rta_search(webpage)
1507         # And then there are the jokers who advertise that they use RTA,
1508         # but actually don't.
1509         AGE_LIMIT_MARKERS = [
1510             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1511         ]
1512         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1513             age_limit = 18
1514
1515         # video uploader is domain name
1516         video_uploader = self._search_regex(
1517             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1518
1519         video_description = self._og_search_description(webpage, default=None)
1520         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
1521
1522         # Helper method
1523         def _playlist_from_matches(matches, getter=None, ie=None):
1524             urlrs = orderedSet(
1525                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1526                 for m in matches)
1527             return self.playlist_result(
1528                 urlrs, playlist_id=video_id, playlist_title=video_title)
1529
1530         # Look for Brightcove Legacy Studio embeds
1531         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1532         if bc_urls:
1533             self.to_screen('Brightcove video detected.')
1534             entries = [{
1535                 '_type': 'url',
1536                 'url': smuggle_url(bc_url, {'Referer': url}),
1537                 'ie_key': 'BrightcoveLegacy'
1538             } for bc_url in bc_urls]
1539
1540             return {
1541                 '_type': 'playlist',
1542                 'title': video_title,
1543                 'id': video_id,
1544                 'entries': entries,
1545             }
1546
1547         # Look for Brightcove New Studio embeds
1548         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1549         if bc_urls:
1550             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1551
1552         # Look for ThePlatform embeds
1553         tp_urls = ThePlatformIE._extract_urls(webpage)
1554         if tp_urls:
1555             return _playlist_from_matches(tp_urls, ie='ThePlatform')
1556
1557         # Look for Vessel embeds
1558         vessel_urls = VesselIE._extract_urls(webpage)
1559         if vessel_urls:
1560             return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
1561
1562         # Look for embedded rtl.nl player
1563         matches = re.findall(
1564             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1565             webpage)
1566         if matches:
1567             return _playlist_from_matches(matches, ie='RtlNl')
1568
1569         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1570         if vimeo_url is not None:
1571             return self.url_result(vimeo_url)
1572
1573         vid_me_embed_url = self._search_regex(
1574             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1575             webpage, 'vid.me embed', default=None)
1576         if vid_me_embed_url is not None:
1577             return self.url_result(vid_me_embed_url, 'Vidme')
1578
1579         # Look for embedded YouTube player
1580         matches = re.findall(r'''(?x)
1581             (?:
1582                 <iframe[^>]+?src=|
1583                 data-video-url=|
1584                 <embed[^>]+?src=|
1585                 embedSWF\(?:\s*|
1586                 new\s+SWFObject\(
1587             )
1588             (["\'])
1589                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1590                 (?:embed|v|p)/.+?)
1591             \1''', webpage)
1592         if matches:
1593             return _playlist_from_matches(
1594                 matches, lambda m: unescapeHTML(m[1]))
1595
1596         # Look for lazyYT YouTube embed
1597         matches = re.findall(
1598             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1599         if matches:
1600             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1601
1602         # Look for embedded Dailymotion player
1603         matches = re.findall(
1604             r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
1605         if matches:
1606             return _playlist_from_matches(
1607                 matches, lambda m: unescapeHTML(m[1]))
1608
1609         # Look for embedded Dailymotion playlist player (#3822)
1610         m = re.search(
1611             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1612         if m:
1613             playlists = re.findall(
1614                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1615             if playlists:
1616                 return _playlist_from_matches(
1617                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1618
1619         # Look for embedded Wistia player
1620         match = re.search(
1621             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1622         if match:
1623             embed_url = self._proto_relative_url(
1624                 unescapeHTML(match.group('url')))
1625             return {
1626                 '_type': 'url_transparent',
1627                 'url': embed_url,
1628                 'ie_key': 'Wistia',
1629                 'uploader': video_uploader,
1630             }
1631
1632         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1633         if match:
1634             return {
1635                 '_type': 'url_transparent',
1636                 'url': 'wistia:%s' % match.group('id'),
1637                 'ie_key': 'Wistia',
1638                 'uploader': video_uploader,
1639             }
1640
1641         match = re.search(
1642             r'''(?sx)
1643                 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
1644                 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
1645             ''', webpage)
1646         if match:
1647             return self.url_result(self._proto_relative_url(
1648                 'wistia:%s' % match.group('id')), 'Wistia')
1649
1650         # Look for SVT player
1651         svt_url = SVTIE._extract_url(webpage)
1652         if svt_url:
1653             return self.url_result(svt_url, 'SVT')
1654
1655         # Look for embedded condenast player
1656         matches = re.findall(
1657             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1658             webpage)
1659         if matches:
1660             return {
1661                 '_type': 'playlist',
1662                 'entries': [{
1663                     '_type': 'url',
1664                     'ie_key': 'CondeNast',
1665                     'url': ma,
1666                 } for ma in matches],
1667                 'title': video_title,
1668                 'id': video_id,
1669             }
1670
1671         # Look for Bandcamp pages with custom domain
1672         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1673         if mobj is not None:
1674             burl = unescapeHTML(mobj.group(1))
1675             # Don't set the extractor because it can be a track url or an album
1676             return self.url_result(burl)
1677
1678         # Look for embedded Vevo player
1679         mobj = re.search(
1680             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1681         if mobj is not None:
1682             return self.url_result(mobj.group('url'))
1683
1684         # Look for embedded Viddler player
1685         mobj = re.search(
1686             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1687             webpage)
1688         if mobj is not None:
1689             return self.url_result(mobj.group('url'))
1690
1691         # Look for NYTimes player
1692         mobj = re.search(
1693             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1694             webpage)
1695         if mobj is not None:
1696             return self.url_result(mobj.group('url'))
1697
1698         # Look for Libsyn player
1699         mobj = re.search(
1700             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1701         if mobj is not None:
1702             return self.url_result(mobj.group('url'))
1703
1704         # Look for Ooyala videos
1705         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1706                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1707                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1708                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1709         if mobj is not None:
1710             return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
1711
1712         # Look for multiple Ooyala embeds on SBN network websites
1713         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1714         if mobj is not None:
1715             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1716             if embeds:
1717                 return _playlist_from_matches(
1718                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
1719
1720         # Look for Aparat videos
1721         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1722         if mobj is not None:
1723             return self.url_result(mobj.group(1), 'Aparat')
1724
1725         # Look for MPORA videos
1726         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1727         if mobj is not None:
1728             return self.url_result(mobj.group(1), 'Mpora')
1729
1730         # Look for embedded NovaMov-based player
1731         mobj = re.search(
1732             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1733                     (?P<url>http://(?:(?:embed|www)\.)?
1734                         (?:novamov\.com|
1735                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1736                            videoweed\.(?:es|com)|
1737                            movshare\.(?:net|sx|ag)|
1738                            divxstage\.(?:eu|net|ch|co|at|ag))
1739                         /embed\.php.+?)\1''', webpage)
1740         if mobj is not None:
1741             return self.url_result(mobj.group('url'))
1742
1743         # Look for embedded Facebook player
1744         mobj = re.search(
1745             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1746         if mobj is not None:
1747             return self.url_result(mobj.group('url'), 'Facebook')
1748
1749         # Look for embedded VK player
1750         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1751         if mobj is not None:
1752             return self.url_result(mobj.group('url'), 'VK')
1753
1754         # Look for embedded Odnoklassniki player
1755         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
1756         if mobj is not None:
1757             return self.url_result(mobj.group('url'), 'Odnoklassniki')
1758
1759         # Look for embedded ivi player
1760         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1761         if mobj is not None:
1762             return self.url_result(mobj.group('url'), 'Ivi')
1763
1764         # Look for embedded Huffington Post player
1765         mobj = re.search(
1766             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1767         if mobj is not None:
1768             return self.url_result(mobj.group('url'), 'HuffPost')
1769
1770         # Look for embed.ly
1771         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1772         if mobj is not None:
1773             return self.url_result(mobj.group('url'))
1774         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1775         if mobj is not None:
1776             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1777
1778         # Look for funnyordie embed
1779         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1780         if matches:
1781             return _playlist_from_matches(
1782                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1783
1784         # Look for BBC iPlayer embed
1785         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1786         if matches:
1787             return _playlist_from_matches(matches, ie='BBCCoUk')
1788
1789         # Look for embedded RUTV player
1790         rutv_url = RUTVIE._extract_url(webpage)
1791         if rutv_url:
1792             return self.url_result(rutv_url, 'RUTV')
1793
1794         # Look for embedded TVC player
1795         tvc_url = TVCIE._extract_url(webpage)
1796         if tvc_url:
1797             return self.url_result(tvc_url, 'TVC')
1798
1799         # Look for embedded SportBox player
1800         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1801         if sportbox_urls:
1802             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1803
1804         # Look for embedded PornHub player
1805         pornhub_url = PornHubIE._extract_url(webpage)
1806         if pornhub_url:
1807             return self.url_result(pornhub_url, 'PornHub')
1808
1809         # Look for embedded XHamster player
1810         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1811         if xhamster_urls:
1812             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1813
1814         # Look for embedded TNAFlixNetwork player
1815         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
1816         if tnaflix_urls:
1817             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
1818
1819         # Look for embedded Tvigle player
1820         mobj = re.search(
1821             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1822         if mobj is not None:
1823             return self.url_result(mobj.group('url'), 'Tvigle')
1824
1825         # Look for embedded TED player
1826         mobj = re.search(
1827             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1828         if mobj is not None:
1829             return self.url_result(mobj.group('url'), 'TED')
1830
1831         # Look for embedded Ustream videos
1832         mobj = re.search(
1833             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1834         if mobj is not None:
1835             return self.url_result(mobj.group('url'), 'Ustream')
1836
1837         # Look for embedded arte.tv player
1838         mobj = re.search(
1839             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
1840             webpage)
1841         if mobj is not None:
1842             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1843
1844         # Look for embedded francetv player
1845         mobj = re.search(
1846             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1847             webpage)
1848         if mobj is not None:
1849             return self.url_result(mobj.group('url'))
1850
1851         # Look for embedded smotri.com player
1852         smotri_url = SmotriIE._extract_url(webpage)
1853         if smotri_url:
1854             return self.url_result(smotri_url, 'Smotri')
1855
1856         # Look for embedded Myvi.ru player
1857         myvi_url = MyviIE._extract_url(webpage)
1858         if myvi_url:
1859             return self.url_result(myvi_url)
1860
1861         # Look for embedded soundcloud player
1862         mobj = re.search(
1863             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1864             webpage)
1865         if mobj is not None:
1866             url = unescapeHTML(mobj.group('url'))
1867             return self.url_result(url)
1868
1869         # Look for embedded mtvservices player
1870         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1871         if mtvservices_url:
1872             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1873
1874         # Look for embedded yahoo player
1875         mobj = re.search(
1876             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1877             webpage)
1878         if mobj is not None:
1879             return self.url_result(mobj.group('url'), 'Yahoo')
1880
1881         # Look for embedded sbs.com.au player
1882         mobj = re.search(
1883             r'''(?x)
1884             (?:
1885                 <meta\s+property="og:video"\s+content=|
1886                 <iframe[^>]+?src=
1887             )
1888             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1889             webpage)
1890         if mobj is not None:
1891             return self.url_result(mobj.group('url'), 'SBS')
1892
1893         # Look for embedded Cinchcast player
1894         mobj = re.search(
1895             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1896             webpage)
1897         if mobj is not None:
1898             return self.url_result(mobj.group('url'), 'Cinchcast')
1899
1900         mobj = re.search(
1901             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1902             webpage)
1903         if not mobj:
1904             mobj = re.search(
1905                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1906                 webpage)
1907         if mobj is not None:
1908             return self.url_result(mobj.group('url'), 'MLB')
1909
1910         mobj = re.search(
1911             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1912             webpage)
1913         if mobj is not None:
1914             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1915
1916         mobj = re.search(
1917             r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
1918             webpage)
1919         if mobj is not None:
1920             return self.url_result(mobj.group('url'), 'Livestream')
1921
1922         # Look for Zapiks embed
1923         mobj = re.search(
1924             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1925         if mobj is not None:
1926             return self.url_result(mobj.group('url'), 'Zapiks')
1927
1928         # Look for Kaltura embeds
1929         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
1930                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?(?P<q2>["\'])?entry_?[Ii]d(?P=q2)\s*:\s*(?P<q3>["\'])(?P<id>.+?)(?P=q3)', webpage))
1931         if mobj is not None:
1932             return self.url_result(smuggle_url(
1933                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1934                 {'source_url': url}), 'Kaltura')
1935
1936         # Look for Eagle.Platform embeds
1937         mobj = re.search(
1938             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1939         if mobj is not None:
1940             return self.url_result(mobj.group('url'), 'EaglePlatform')
1941
1942         # Look for ClipYou (uses Eagle.Platform) embeds
1943         mobj = re.search(
1944             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1945         if mobj is not None:
1946             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1947
1948         # Look for Pladform embeds
1949         pladform_url = PladformIE._extract_url(webpage)
1950         if pladform_url:
1951             return self.url_result(pladform_url)
1952
1953         # Look for Videomore embeds
1954         videomore_url = VideomoreIE._extract_url(webpage)
1955         if videomore_url:
1956             return self.url_result(videomore_url)
1957
1958         # Look for Playwire embeds
1959         mobj = re.search(
1960             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1961         if mobj is not None:
1962             return self.url_result(mobj.group('url'))
1963
1964         # Look for 5min embeds
1965         mobj = re.search(
1966             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1967         if mobj is not None:
1968             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1969
1970         # Look for Crooks and Liars embeds
1971         mobj = re.search(
1972             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1973         if mobj is not None:
1974             return self.url_result(mobj.group('url'))
1975
1976         # Look for NBC Sports VPlayer embeds
1977         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1978         if nbc_sports_url:
1979             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1980
1981         # Look for NBC News embeds
1982         nbc_news_embed_url = re.search(
1983             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
1984         if nbc_news_embed_url:
1985             return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
1986
1987         # Look for Google Drive embeds
1988         google_drive_url = GoogleDriveIE._extract_url(webpage)
1989         if google_drive_url:
1990             return self.url_result(google_drive_url, 'GoogleDrive')
1991
1992         # Look for UDN embeds
1993         mobj = re.search(
1994             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
1995         if mobj is not None:
1996             return self.url_result(
1997                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1998
1999         # Look for Senate ISVP iframe
2000         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2001         if senate_isvp_url:
2002             return self.url_result(senate_isvp_url, 'SenateISVP')
2003
2004         # Look for Dailymotion Cloud videos
2005         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
2006         if dmcloud_url:
2007             return self.url_result(dmcloud_url, 'DailymotionCloud')
2008
2009         # Look for OnionStudios embeds
2010         onionstudios_url = OnionStudiosIE._extract_url(webpage)
2011         if onionstudios_url:
2012             return self.url_result(onionstudios_url)
2013
2014         # Look for ViewLift embeds
2015         viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2016         if viewlift_url:
2017             return self.url_result(viewlift_url)
2018
2019         # Look for JWPlatform embeds
2020         jwplatform_url = JWPlatformIE._extract_url(webpage)
2021         if jwplatform_url:
2022             return self.url_result(jwplatform_url, 'JWPlatform')
2023
2024         # Look for ScreenwaveMedia embeds
2025         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
2026         if mobj is not None:
2027             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
2028
2029         # Look for Digiteka embeds
2030         digiteka_url = DigitekaIE._extract_url(webpage)
2031         if digiteka_url:
2032             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2033
2034         # Look for Limelight embeds
2035         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
2036         if mobj:
2037             lm = {
2038                 'Media': 'media',
2039                 'Channel': 'channel',
2040                 'ChannelList': 'channel_list',
2041             }
2042             return self.url_result('limelight:%s:%s' % (
2043                 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
2044
2045         # Look for AdobeTVVideo embeds
2046         mobj = re.search(
2047             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2048             webpage)
2049         if mobj is not None:
2050             return self.url_result(
2051                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2052                 'AdobeTVVideo')
2053
2054         # Look for Vine embeds
2055         mobj = re.search(
2056             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2057             webpage)
2058         if mobj is not None:
2059             return self.url_result(
2060                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2061
2062         # Look for Instagram embeds
2063         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2064         if instagram_embed_url is not None:
2065             return self.url_result(
2066                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2067
2068         # Look for LiveLeak embeds
2069         liveleak_url = LiveLeakIE._extract_url(webpage)
2070         if liveleak_url:
2071             return self.url_result(liveleak_url, 'LiveLeak')
2072
2073         # Look for 3Q SDN embeds
2074         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2075         if threeqsdn_url:
2076             return {
2077                 '_type': 'url_transparent',
2078                 'ie_key': ThreeQSDNIE.ie_key(),
2079                 'url': self._proto_relative_url(threeqsdn_url),
2080                 'title': video_title,
2081                 'description': video_description,
2082                 'thumbnail': video_thumbnail,
2083                 'uploader': video_uploader,
2084             }
2085
2086         def check_video(vurl):
2087             if YoutubeIE.suitable(vurl):
2088                 return True
2089             vpath = compat_urlparse.urlparse(vurl).path
2090             vext = determine_ext(vpath)
2091             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
2092
2093         def filter_video(urls):
2094             return list(filter(check_video, urls))
2095
2096         # Start with something easy: JW Player in SWFObject
2097         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
2098         if not found:
2099             # Look for gorilla-vid style embedding
2100             found = filter_video(re.findall(r'''(?sx)
2101                 (?:
2102                     jw_plugins|
2103                     JWPlayerOptions|
2104                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2105                 )
2106                 .*?
2107                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
2108         if not found:
2109             # Broaden the search a little bit
2110             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
2111         if not found:
2112             # Broaden the findall a little bit: JWPlayer JS loader
2113             found = filter_video(re.findall(
2114                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
2115         if not found:
2116             # Flow player
2117             found = filter_video(re.findall(r'''(?xs)
2118                 flowplayer\("[^"]+",\s*
2119                     \{[^}]+?\}\s*,
2120                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
2121                         ["']?url["']?\s*:\s*["']([^"']+)["']
2122             ''', webpage))
2123         if not found:
2124             # Cinerama player
2125             found = re.findall(
2126                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
2127         if not found:
2128             # Try to find twitter cards info
2129             found = filter_video(re.findall(
2130                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
2131         if not found:
2132             # We look for Open Graph info:
2133             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
2134             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
2135             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2136             if m_video_type is not None:
2137                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
2138         if not found:
2139             # HTML5 video
2140             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
2141         if not found:
2142             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
2143             found = re.search(
2144                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2145                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2146                 webpage)
2147             if not found:
2148                 # Look also in Refresh HTTP header
2149                 refresh_header = head_response.headers.get('Refresh')
2150                 if refresh_header:
2151                     # In python 2 response HTTP headers are bytestrings
2152                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2153                         refresh_header = refresh_header.decode('iso-8859-1')
2154                     found = re.search(REDIRECT_REGEX, refresh_header)
2155             if found:
2156                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2157                 self.report_following_redirect(new_url)
2158                 return {
2159                     '_type': 'url',
2160                     'url': new_url,
2161                 }
2162         if not found:
2163             raise UnsupportedError(url)
2164
2165         entries = []
2166         for video_url in orderedSet(found):
2167             video_url = unescapeHTML(video_url)
2168             video_url = video_url.replace('\\/', '/')
2169             video_url = compat_urlparse.urljoin(url, video_url)
2170             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2171
2172             # Sometimes, jwplayer extraction will result in a YouTube URL
2173             if YoutubeIE.suitable(video_url):
2174                 entries.append(self.url_result(video_url, 'Youtube'))
2175                 continue
2176
2177             # here's a fun little line of code for you:
2178             video_id = os.path.splitext(video_id)[0]
2179
2180             entry_info_dict = {
2181                 'id': video_id,
2182                 'uploader': video_uploader,
2183                 'title': video_title,
2184                 'age_limit': age_limit,
2185             }
2186
2187             ext = determine_ext(video_url)
2188             if ext == 'smil':
2189                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2190             elif ext == 'xspf':
2191                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2192             elif ext == 'm3u8':
2193                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2194             elif ext == 'mpd':
2195                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2196             elif ext == 'f4m':
2197                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2198             else:
2199                 entry_info_dict['url'] = video_url
2200
2201             if entry_info_dict.get('formats'):
2202                 self._sort_formats(entry_info_dict['formats'])
2203
2204             entries.append(entry_info_dict)
2205
2206         if len(entries) == 1:
2207             return entries[0]
2208         else:
2209             for num, e in enumerate(entries, start=1):
2210                 # 'url' results don't have a title
2211                 if e.get('title') is not None:
2212                     e['title'] = '%s (%d)' % (e['title'], num)
2213             return {
2214                 '_type': 'playlist',
2215                 'entries': entries,
2216             }