[generic] Update the UDNEmbed test case
[youtube-dl] / youtube_dl / extractor / generic.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7 import sys
8
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12     compat_etree_fromstring,
13     compat_urllib_parse_unquote,
14     compat_urlparse,
15     compat_xml_parse_error,
16 )
17 from ..utils import (
18     determine_ext,
19     ExtractorError,
20     float_or_none,
21     HEADRequest,
22     is_html,
23     orderedSet,
24     sanitized_Request,
25     smuggle_url,
26     unescapeHTML,
27     unified_strdate,
28     unsmuggle_url,
29     UnsupportedError,
30     url_basename,
31     xpath_text,
32 )
33 from .brightcove import (
34     BrightcoveLegacyIE,
35     BrightcoveNewIE,
36 )
37 from .nbc import NBCSportsVPlayerIE
38 from .ooyala import OoyalaIE
39 from .rutv import RUTVIE
40 from .tvc import TVCIE
41 from .sportbox import SportBoxEmbedIE
42 from .smotri import SmotriIE
43 from .myvi import MyviIE
44 from .condenast import CondeNastIE
45 from .udn import UDNEmbedIE
46 from .senateisvp import SenateISVPIE
47 from .svt import SVTIE
48 from .pornhub import PornHubIE
49 from .xhamster import XHamsterEmbedIE
50 from .tnaflix import TNAFlixNetworkEmbedIE
51 from .vimeo import VimeoIE
52 from .dailymotion import DailymotionCloudIE
53 from .onionstudios import OnionStudiosIE
54 from .viewlift import ViewLiftEmbedIE
55 from .screenwavemedia import ScreenwaveMediaIE
56 from .mtv import MTVServicesEmbeddedIE
57 from .pladform import PladformIE
58 from .videomore import VideomoreIE
59 from .googledrive import GoogleDriveIE
60 from .jwplatform import JWPlatformIE
61 from .digiteka import DigitekaIE
62 from .instagram import InstagramIE
63 from .liveleak import LiveLeakIE
64 from .threeqsdn import ThreeQSDNIE
65 from .theplatform import ThePlatformIE
66
67
68 class GenericIE(InfoExtractor):
69     IE_DESC = 'Generic downloader that works on some sites'
70     _VALID_URL = r'.*'
71     IE_NAME = 'generic'
72     _TESTS = [
73         # Direct link to a video
74         {
75             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
76             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
77             'info_dict': {
78                 'id': 'trailer',
79                 'ext': 'mp4',
80                 'title': 'trailer',
81                 'upload_date': '20100513',
82             }
83         },
84         # Direct link to media delivered compressed (until Accept-Encoding is *)
85         {
86             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
87             'md5': '128c42e68b13950268b648275386fc74',
88             'info_dict': {
89                 'id': 'FictionJunction-Parallel_Hearts',
90                 'ext': 'flac',
91                 'title': 'FictionJunction-Parallel_Hearts',
92                 'upload_date': '20140522',
93             },
94             'expected_warnings': [
95                 'URL could be a direct video link, returning it as such.'
96             ]
97         },
98         # Direct download with broken HEAD
99         {
100             'url': 'http://ai-radio.org:8000/radio.opus',
101             'info_dict': {
102                 'id': 'radio',
103                 'ext': 'opus',
104                 'title': 'radio',
105             },
106             'params': {
107                 'skip_download': True,  # infinite live stream
108             },
109             'expected_warnings': [
110                 r'501.*Not Implemented',
111                 r'400.*Bad Request',
112             ],
113         },
114         # Direct link with incorrect MIME type
115         {
116             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
117             'md5': '4ccbebe5f36706d85221f204d7eb5913',
118             'info_dict': {
119                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
120                 'id': '5_Lennart_Poettering_-_Systemd',
121                 'ext': 'webm',
122                 'title': '5_Lennart_Poettering_-_Systemd',
123                 'upload_date': '20141120',
124             },
125             'expected_warnings': [
126                 'URL could be a direct video link, returning it as such.'
127             ]
128         },
129         # RSS feed
130         {
131             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
132             'info_dict': {
133                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
134                 'title': 'Zero Punctuation',
135                 'description': 're:.*groundbreaking video review series.*'
136             },
137             'playlist_mincount': 11,
138         },
139         # RSS feed with enclosure
140         {
141             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
142             'info_dict': {
143                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
144                 'ext': 'm4v',
145                 'upload_date': '20150228',
146                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
147             }
148         },
149         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
150         {
151             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
152             'info_dict': {
153                 'id': 'smil',
154                 'ext': 'mp4',
155                 'title': 'Automatics, robotics and biocybernetics',
156                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
157                 'upload_date': '20130627',
158                 'formats': 'mincount:16',
159                 'subtitles': 'mincount:1',
160             },
161             'params': {
162                 'force_generic_extractor': True,
163                 'skip_download': True,
164             },
165         },
166         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
167         {
168             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
169             'info_dict': {
170                 'id': 'hds',
171                 'ext': 'flv',
172                 'title': 'hds',
173                 'formats': 'mincount:1',
174             },
175             'params': {
176                 'skip_download': True,
177             },
178         },
179         # SMIL from https://www.restudy.dk/video/play/id/1637
180         {
181             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
182             'info_dict': {
183                 'id': 'video_1637',
184                 'ext': 'flv',
185                 'title': 'video_1637',
186                 'formats': 'mincount:3',
187             },
188             'params': {
189                 'skip_download': True,
190             },
191         },
192         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
193         {
194             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
195             'info_dict': {
196                 'id': 'smil-service',
197                 'ext': 'flv',
198                 'title': 'smil-service',
199                 'formats': 'mincount:1',
200             },
201             'params': {
202                 'skip_download': True,
203             },
204         },
205         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
206         {
207             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
208             'info_dict': {
209                 'id': '4719370',
210                 'ext': 'mp4',
211                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
212                 'formats': 'mincount:3',
213             },
214             'params': {
215                 'skip_download': True,
216             },
217         },
218         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
219         {
220             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
221             'info_dict': {
222                 'id': 'mZlp2ctYIUEB',
223                 'ext': 'mp4',
224                 'title': 'Tikibad ontruimd wegens brand',
225                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
226                 'thumbnail': 're:^https?://.*\.jpg$',
227                 'duration': 33,
228             },
229             'params': {
230                 'skip_download': True,
231             },
232         },
233         # MPD from http://dash-mse-test.appspot.com/media.html
234         {
235             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
236             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
237             'info_dict': {
238                 'id': 'car-20120827-manifest',
239                 'ext': 'mp4',
240                 'title': 'car-20120827-manifest',
241                 'formats': 'mincount:9',
242                 'upload_date': '20130904',
243             },
244             'params': {
245                 'format': 'bestvideo',
246             },
247         },
248         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
249         {
250             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
251             'info_dict': {
252                 'id': 'content',
253                 'ext': 'mp4',
254                 'title': 'content',
255                 'formats': 'mincount:8',
256             },
257             'params': {
258                 # m3u8 downloads
259                 'skip_download': True,
260             }
261         },
262         # m3u8 served with Content-Type: text/plain
263         {
264             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
265             'info_dict': {
266                 'id': 'index',
267                 'ext': 'mp4',
268                 'title': 'index',
269                 'upload_date': '20140720',
270                 'formats': 'mincount:11',
271             },
272             'params': {
273                 # m3u8 downloads
274                 'skip_download': True,
275             }
276         },
277         # google redirect
278         {
279             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
280             'info_dict': {
281                 'id': 'cmQHVoWB5FY',
282                 'ext': 'mp4',
283                 'upload_date': '20130224',
284                 'uploader_id': 'TheVerge',
285                 'description': 're:^Chris Ziegler takes a look at the\.*',
286                 'uploader': 'The Verge',
287                 'title': 'First Firefox OS phones side-by-side',
288             },
289             'params': {
290                 'skip_download': False,
291             }
292         },
293         {
294             # redirect in Refresh HTTP header
295             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
296             'info_dict': {
297                 'id': 'pO8h3EaFRdo',
298                 'ext': 'mp4',
299                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
300                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
301                 'upload_date': '20150917',
302                 'uploader_id': 'brtvofficial',
303                 'uploader': 'Boiler Room',
304             },
305             'params': {
306                 'skip_download': False,
307             },
308         },
309         {
310             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
311             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
312             'info_dict': {
313                 'id': '13601338388002',
314                 'ext': 'mp4',
315                 'uploader': 'www.hodiho.fr',
316                 'title': 'R\u00e9gis plante sa Jeep',
317             }
318         },
319         # bandcamp page with custom domain
320         {
321             'add_ie': ['Bandcamp'],
322             'url': 'http://bronyrock.com/track/the-pony-mash',
323             'info_dict': {
324                 'id': '3235767654',
325                 'ext': 'mp3',
326                 'title': 'The Pony Mash',
327                 'uploader': 'M_Pallante',
328             },
329             'skip': 'There is a limit of 200 free downloads / month for the test song',
330         },
331         # embedded brightcove video
332         # it also tests brightcove videos that need to set the 'Referer' in the
333         # http requests
334         {
335             'add_ie': ['BrightcoveLegacy'],
336             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
337             'info_dict': {
338                 'id': '2765128793001',
339                 'ext': 'mp4',
340                 'title': 'Le cours de bourse : l’analyse technique',
341                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
342                 'uploader': 'BFM BUSINESS',
343             },
344             'params': {
345                 'skip_download': True,
346             },
347         },
348         {
349             # https://github.com/rg3/youtube-dl/issues/2253
350             'url': 'http://bcove.me/i6nfkrc3',
351             'md5': '0ba9446db037002366bab3b3eb30c88c',
352             'info_dict': {
353                 'id': '3101154703001',
354                 'ext': 'mp4',
355                 'title': 'Still no power',
356                 'uploader': 'thestar.com',
357                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
358             },
359             'add_ie': ['BrightcoveLegacy'],
360         },
361         {
362             'url': 'http://www.championat.com/video/football/v/87/87499.html',
363             'md5': 'fb973ecf6e4a78a67453647444222983',
364             'info_dict': {
365                 'id': '3414141473001',
366                 'ext': 'mp4',
367                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
368                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
369                 'uploader': 'Championat',
370             },
371         },
372         {
373             # https://github.com/rg3/youtube-dl/issues/3541
374             'add_ie': ['BrightcoveLegacy'],
375             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
376             'info_dict': {
377                 'id': '3866516442001',
378                 'ext': 'mp4',
379                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
380                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
381                 'uploader': 'SBS Broadcasting',
382             },
383             'skip': 'Restricted to Netherlands',
384             'params': {
385                 'skip_download': True,  # m3u8 download
386             },
387         },
388         # ooyala video
389         {
390             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
391             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
392             'info_dict': {
393                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
394                 'ext': 'mp4',
395                 'title': '2cc213299525360.mov',  # that's what we get
396                 'duration': 238.231,
397             },
398             'add_ie': ['Ooyala'],
399         },
400         {
401             # ooyala video embedded with http://player.ooyala.com/iframe.js
402             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
403             'info_dict': {
404                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
405                 'ext': 'mp4',
406                 'title': '"Steve Jobs: Man in the Machine" trailer',
407                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
408                 'duration': 135.427,
409             },
410             'params': {
411                 'skip_download': True,
412             },
413         },
414         # embed.ly video
415         {
416             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
417             'info_dict': {
418                 'id': '9ODmcdjQcHQ',
419                 'ext': 'mp4',
420                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
421                 'upload_date': '20140225',
422                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
423                 'uploader': 'Tested',
424                 'uploader_id': 'testedcom',
425             },
426             # No need to test YoutubeIE here
427             'params': {
428                 'skip_download': True,
429             },
430         },
431         # funnyordie embed
432         {
433             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
434             'info_dict': {
435                 'id': '18e820ec3f',
436                 'ext': 'mp4',
437                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
438                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
439             },
440         },
441         # RUTV embed
442         {
443             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
444             'info_dict': {
445                 'id': '776940',
446                 'ext': 'mp4',
447                 'title': 'Охотское море стало целиком российским',
448                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
449             },
450             'params': {
451                 # m3u8 download
452                 'skip_download': True,
453             },
454         },
455         # TVC embed
456         {
457             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
458             'info_dict': {
459                 'id': '55304',
460                 'ext': 'mp4',
461                 'title': 'Дошкольное воспитание',
462             },
463         },
464         # SportBox embed
465         {
466             'url': 'http://www.vestifinance.ru/articles/25753',
467             'info_dict': {
468                 'id': '25753',
469                 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
470             },
471             'playlist': [{
472                 'info_dict': {
473                     'id': '370908',
474                     'title': 'Госзаказ. День 3',
475                     'ext': 'mp4',
476                 }
477             }, {
478                 'info_dict': {
479                     'id': '370905',
480                     'title': 'Госзаказ. День 2',
481                     'ext': 'mp4',
482                 }
483             }, {
484                 'info_dict': {
485                     'id': '370902',
486                     'title': 'Госзаказ. День 1',
487                     'ext': 'mp4',
488                 }
489             }],
490             'params': {
491                 # m3u8 download
492                 'skip_download': True,
493             },
494         },
495         # Myvi.ru embed
496         {
497             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
498             'info_dict': {
499                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
500                 'ext': 'mp4',
501                 'title': 'Ужастики, русский трейлер (2015)',
502                 'thumbnail': 're:^https?://.*\.jpg$',
503                 'duration': 153,
504             }
505         },
506         # XHamster embed
507         {
508             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
509             'info_dict': {
510                 'id': 'showthread',
511                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
512             },
513             'playlist_mincount': 7,
514         },
515         # Embedded TED video
516         {
517             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
518             'md5': '65fdff94098e4a607385a60c5177c638',
519             'info_dict': {
520                 'id': '1969',
521                 'ext': 'mp4',
522                 'title': 'Hidden miracles of the natural world',
523                 'uploader': 'Louie Schwartzberg',
524                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
525             }
526         },
527         # Embedded Ustream video
528         {
529             'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
530             'md5': '27b99cdb639c9b12a79bca876a073417',
531             'info_dict': {
532                 'id': '45734260',
533                 'ext': 'flv',
534                 'uploader': 'AU SPA:  The NSA and Privacy',
535                 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
536             }
537         },
538         # nowvideo embed hidden behind percent encoding
539         {
540             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
541             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
542             'info_dict': {
543                 'id': '06e53103ca9aa',
544                 'ext': 'flv',
545                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
546                 'description': 'No description',
547             },
548         },
549         # arte embed
550         {
551             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
552             'md5': '7653032cbb25bf6c80d80f217055fa43',
553             'info_dict': {
554                 'id': '048195-004_PLUS7-F',
555                 'ext': 'flv',
556                 'title': 'X:enius',
557                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
558                 'upload_date': '20140320',
559             },
560             'params': {
561                 'skip_download': 'Requires rtmpdump'
562             }
563         },
564         # francetv embed
565         {
566             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
567             'info_dict': {
568                 'id': 'EV_30231',
569                 'ext': 'mp4',
570                 'title': 'Alcaline, le concert avec Calogero',
571                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
572                 'upload_date': '20150226',
573                 'timestamp': 1424989860,
574                 'duration': 5400,
575             },
576             'params': {
577                 # m3u8 downloads
578                 'skip_download': True,
579             },
580             'expected_warnings': [
581                 'Forbidden'
582             ]
583         },
584         # Condé Nast embed
585         {
586             'url': 'http://www.wired.com/2014/04/honda-asimo/',
587             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
588             'info_dict': {
589                 'id': '53501be369702d3275860000',
590                 'ext': 'mp4',
591                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
592             }
593         },
594         # Dailymotion embed
595         {
596             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
597             'md5': '441aeeb82eb72c422c7f14ec533999cd',
598             'info_dict': {
599                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
600                 'ext': 'mp4',
601                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
602                 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
603                 'uploader': 'Spi0n',
604                 'uploader_id': 'xgditw',
605                 'upload_date': '20140425',
606                 'timestamp': 1398441542,
607             },
608             'add_ie': ['Dailymotion'],
609         },
610         # YouTube embed
611         {
612             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
613             'info_dict': {
614                 'id': 'FXRb4ykk4S0',
615                 'ext': 'mp4',
616                 'title': 'The NBL Auction 2014',
617                 'uploader': 'BADMINTON England',
618                 'uploader_id': 'BADMINTONEvents',
619                 'upload_date': '20140603',
620                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
621             },
622             'add_ie': ['Youtube'],
623             'params': {
624                 'skip_download': True,
625             }
626         },
627         # MTVSercices embed
628         {
629             'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
630             'md5': '35727f82f58c76d996fc188f9755b0d5',
631             'info_dict': {
632                 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
633                 'ext': 'mp4',
634                 'title': 'Review',
635                 'description': 'Mario\'s life in the fast lane has never looked so good.',
636             },
637         },
638         # YouTube embed via <data-embed-url="">
639         {
640             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
641             'info_dict': {
642                 'id': '4vAffPZIT44',
643                 'ext': 'mp4',
644                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
645                 'uploader': 'Gameloft',
646                 'uploader_id': 'gameloft',
647                 'upload_date': '20140828',
648                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
649             },
650             'params': {
651                 'skip_download': True,
652             }
653         },
654         # Camtasia studio
655         {
656             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
657             'playlist': [{
658                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
659                 'info_dict': {
660                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
661                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
662                     'ext': 'flv',
663                     'duration': 2235.90,
664                 }
665             }, {
666                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
667                 'info_dict': {
668                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
669                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
670                     'ext': 'flv',
671                     'duration': 2235.93,
672                 }
673             }],
674             'info_dict': {
675                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
676             }
677         },
678         # Flowplayer
679         {
680             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
681             'md5': '9d65602bf31c6e20014319c7d07fba27',
682             'info_dict': {
683                 'id': '5123ea6d5e5a7',
684                 'ext': 'mp4',
685                 'age_limit': 18,
686                 'uploader': 'www.handjobhub.com',
687                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
688             }
689         },
690         # Multiple brightcove videos
691         # https://github.com/rg3/youtube-dl/issues/2283
692         {
693             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
694             'info_dict': {
695                 'id': 'always-never',
696                 'title': 'Always / Never - The New Yorker',
697             },
698             'playlist_count': 3,
699             'params': {
700                 'extract_flat': False,
701                 'skip_download': True,
702             }
703         },
704         # MLB embed
705         {
706             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
707             'md5': '96f09a37e44da40dd083e12d9a683327',
708             'info_dict': {
709                 'id': '33322633',
710                 'ext': 'mp4',
711                 'title': 'Ump changes call to ball',
712                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
713                 'duration': 48,
714                 'timestamp': 1401537900,
715                 'upload_date': '20140531',
716                 'thumbnail': 're:^https?://.*\.jpg$',
717             },
718         },
719         # Wistia embed
720         {
721             'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
722             'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
723             'info_dict': {
724                 'id': '6e2wtrbdaf',
725                 'ext': 'mov',
726                 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
727                 'description': 'a Paywall Videos video from Remilon',
728                 'duration': 644.072,
729                 'uploader': 'study.com',
730                 'timestamp': 1459678540,
731                 'upload_date': '20160403',
732                 'filesize': 24687186,
733             },
734         },
735         {
736             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
737             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
738             'info_dict': {
739                 'id': 'uxjb0lwrcz',
740                 'ext': 'mp4',
741                 'title': 'Conversation about Hexagonal Rails Part 1',
742                 'description': 'a Martin Fowler video from ThoughtWorks',
743                 'duration': 1715.0,
744                 'uploader': 'thoughtworks.wistia.com',
745                 'timestamp': 1401832161,
746                 'upload_date': '20140603',
747             },
748         },
749         # Wistia standard embed (async)
750         {
751             'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
752             'info_dict': {
753                 'id': '807fafadvk',
754                 'ext': 'mp4',
755                 'title': 'Drip Brennan Dunn Workshop',
756                 'description': 'a JV Webinars video from getdrip-1',
757                 'duration': 4986.95,
758                 'timestamp': 1463607249,
759                 'upload_date': '20160518',
760             },
761             'params': {
762                 'skip_download': True,
763             }
764         },
765         # Soundcloud embed
766         {
767             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
768             'info_dict': {
769                 'id': '174391317',
770                 'ext': 'mp3',
771                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
772                 'uploader': 'Sophos Security',
773                 'title': 'Chet Chat 171 - Oct 29, 2014',
774                 'upload_date': '20141029',
775             }
776         },
777         # Livestream embed
778         {
779             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
780             'info_dict': {
781                 'id': '67864563',
782                 'ext': 'flv',
783                 'upload_date': '20141112',
784                 'title': 'Rosetta #CometLanding webcast HL 10',
785             }
786         },
787         # Another Livestream embed, without 'new.' in URL
788         {
789             'url': 'https://www.freespeech.org/',
790             'info_dict': {
791                 'id': '123537347',
792                 'ext': 'mp4',
793                 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
794             },
795             'params': {
796                 # Live stream
797                 'skip_download': True,
798             },
799         },
800         # LazyYT
801         {
802             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
803             'info_dict': {
804                 'id': '1986',
805                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
806             },
807             'playlist_mincount': 2,
808         },
809         # Cinchcast embed
810         {
811             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
812             'info_dict': {
813                 'id': '7141703',
814                 'ext': 'mp3',
815                 'upload_date': '20141126',
816                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
817             }
818         },
819         # Cinerama player
820         {
821             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
822             'info_dict': {
823                 'id': '730m_DandD_1901_512k',
824                 'ext': 'mp4',
825                 'uploader': 'www.abc.net.au',
826                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
827             }
828         },
829         # embedded viddler video
830         {
831             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
832             'info_dict': {
833                 'id': '4d03aad9',
834                 'ext': 'mp4',
835                 'uploader': 'deadspin',
836                 'title': 'WALL-TO-GORTAT',
837                 'timestamp': 1422285291,
838                 'upload_date': '20150126',
839             },
840             'add_ie': ['Viddler'],
841         },
842         # Libsyn embed
843         {
844             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
845             'info_dict': {
846                 'id': '3377616',
847                 'ext': 'mp3',
848                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
849                 'description': 'md5:601cb790edd05908957dae8aaa866465',
850                 'upload_date': '20150220',
851             },
852         },
853         # jwplayer YouTube
854         {
855             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
856             'info_dict': {
857                 'id': 'Mrj4DVp2zeA',
858                 'ext': 'mp4',
859                 'upload_date': '20150212',
860                 'uploader': 'The National Archives UK',
861                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
862                 'uploader_id': 'NationalArchives08',
863                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
864             },
865         },
866         # rtl.nl embed
867         {
868             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
869             'playlist_mincount': 5,
870             'info_dict': {
871                 'id': 'aanslagen-kopenhagen',
872                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
873             }
874         },
875         # Zapiks embed
876         {
877             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
878             'info_dict': {
879                 'id': '118046',
880                 'ext': 'mp4',
881                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
882             }
883         },
884         # Kaltura embed (different embed code)
885         {
886             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
887             'info_dict': {
888                 'id': '1_a52wc67y',
889                 'ext': 'flv',
890                 'upload_date': '20150127',
891                 'uploader_id': 'PremierMedia',
892                 'timestamp': int,
893                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
894             },
895         },
896         # Kaltura embed protected with referrer
897         {
898             'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
899             'info_dict': {
900                 'id': '1_g4fbemnq',
901                 'ext': 'mp4',
902                 'title': 'Violetta - Achter De Schermen - Ruggero',
903                 'description': 'Achter de schermen met Ruggero',
904                 'timestamp': 1435133761,
905                 'upload_date': '20150624',
906                 'uploader_id': 'echojecka',
907             },
908         },
909         # Kaltura embed with single quotes
910         {
911             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
912             'info_dict': {
913                 'id': '0_izeg5utt',
914                 'ext': 'mp4',
915                 'title': '35871',
916                 'timestamp': 1355743100,
917                 'upload_date': '20121217',
918                 'uploader_id': 'batchUser',
919             },
920             'add_ie': ['Kaltura'],
921         },
922         # Eagle.Platform embed (generic URL)
923         {
924             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
925             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
926             'info_dict': {
927                 'id': '227304',
928                 'ext': 'mp4',
929                 'title': 'Навальный вышел на свободу',
930                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
931                 'thumbnail': 're:^https?://.*\.jpg$',
932                 'duration': 87,
933                 'view_count': int,
934                 'age_limit': 0,
935             },
936         },
937         # ClipYou (Eagle.Platform) embed (custom URL)
938         {
939             'url': 'http://muz-tv.ru/play/7129/',
940             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
941             'info_dict': {
942                 'id': '12820',
943                 'ext': 'mp4',
944                 'title': "'O Sole Mio",
945                 'thumbnail': 're:^https?://.*\.jpg$',
946                 'duration': 216,
947                 'view_count': int,
948             },
949         },
950         # Pladform embed
951         {
952             'url': 'http://muz-tv.ru/kinozal/view/7400/',
953             'info_dict': {
954                 'id': '100183293',
955                 'ext': 'mp4',
956                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
957                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
958                 'thumbnail': 're:^https?://.*\.jpg$',
959                 'duration': 694,
960                 'age_limit': 0,
961             },
962         },
963         # Playwire embed
964         {
965             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
966             'info_dict': {
967                 'id': '3519514',
968                 'ext': 'mp4',
969                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
970                 'thumbnail': 're:^https?://.*\.png$',
971                 'duration': 45.115,
972             },
973         },
974         # 5min embed
975         {
976             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
977             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
978             'info_dict': {
979                 'id': '518726732',
980                 'ext': 'mp4',
981                 'title': 'Facebook Creates "On This Day" | Crunch Report',
982             },
983         },
984         # SVT embed
985         {
986             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
987             'info_dict': {
988                 'id': '2900353',
989                 'ext': 'flv',
990                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
991                 'duration': 27,
992                 'age_limit': 0,
993             },
994         },
995         # Crooks and Liars embed
996         {
997             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
998             'info_dict': {
999                 'id': '8RUoRhRi',
1000                 'ext': 'mp4',
1001                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1002                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1003                 'timestamp': 1428207000,
1004                 'upload_date': '20150405',
1005                 'uploader': 'Heather',
1006             },
1007         },
1008         # Crooks and Liars external embed
1009         {
1010             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1011             'info_dict': {
1012                 'id': 'MTE3MjUtMzQ2MzA',
1013                 'ext': 'mp4',
1014                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1015                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1016                 'timestamp': 1265032391,
1017                 'upload_date': '20100201',
1018                 'uploader': 'Heather',
1019             },
1020         },
1021         # NBC Sports vplayer embed
1022         {
1023             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1024             'info_dict': {
1025                 'id': 'ln7x1qSThw4k',
1026                 'ext': 'flv',
1027                 'title': "PFT Live: New leader in the 'new-look' defense",
1028                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1029                 'uploader': 'NBCU-SPORTS',
1030                 'upload_date': '20140107',
1031                 'timestamp': 1389118457,
1032             },
1033         },
1034         # UDN embed
1035         {
1036             'url': 'https://video.udn.com/news/300346',
1037             'md5': 'fd2060e988c326991037b9aff9df21a6',
1038             'info_dict': {
1039                 'id': '300346',
1040                 'ext': 'mp4',
1041                 'title': '中一中男師變性 全校師生力挺',
1042                 'thumbnail': 're:^https?://.*\.jpg$',
1043             },
1044             'params': {
1045                 # m3u8 download
1046                 'skip_download': True,
1047             },
1048         },
1049         # Ooyala embed
1050         {
1051             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1052             'info_dict': {
1053                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1054                 'ext': 'mp4',
1055                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1056                 'title': 'This is what separates the Excel masters from the wannabes',
1057                 'duration': 191.933,
1058             },
1059             'params': {
1060                 # m3u8 downloads
1061                 'skip_download': True,
1062             }
1063         },
1064         # Contains a SMIL manifest
1065         {
1066             'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
1067             'info_dict': {
1068                 'id': 'file',
1069                 'ext': 'flv',
1070                 'title': '+ Football: Lottery Champions League Europe',
1071                 'uploader': 'www.telewebion.com',
1072             },
1073             'params': {
1074                 # rtmpe downloads
1075                 'skip_download': True,
1076             }
1077         },
1078         # Brightcove URL in single quotes
1079         {
1080             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1081             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1082             'info_dict': {
1083                 'id': '4255764656001',
1084                 'ext': 'mp4',
1085                 'title': 'SN Presents: Russell Martin, World Citizen',
1086                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1087                 'uploader': 'Rogers Sportsnet',
1088                 'uploader_id': '1704050871',
1089                 'upload_date': '20150525',
1090                 'timestamp': 1432570283,
1091             },
1092         },
1093         # Dailymotion Cloud video
1094         {
1095             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1096             'md5': '49444254273501a64675a7e68c502681',
1097             'info_dict': {
1098                 'id': '5585de919473990de4bee11b',
1099                 'ext': 'mp4',
1100                 'title': 'Le débat',
1101                 'thumbnail': 're:^https?://.*\.jpe?g$',
1102             }
1103         },
1104         # OnionStudios embed
1105         {
1106             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1107             'info_dict': {
1108                 'id': '2855',
1109                 'ext': 'mp4',
1110                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1111                 'thumbnail': 're:^https?://.*\.jpe?g$',
1112                 'uploader': 'ClickHole',
1113                 'uploader_id': 'clickhole',
1114             }
1115         },
1116         # SnagFilms embed
1117         {
1118             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1119             'info_dict': {
1120                 'id': '74849a00-85a9-11e1-9660-123139220831',
1121                 'ext': 'mp4',
1122                 'title': '#whilewewatch',
1123             }
1124         },
1125         # AdobeTVVideo embed
1126         {
1127             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1128             'md5': '43662b577c018ad707a63766462b1e87',
1129             'info_dict': {
1130                 'id': '2456',
1131                 'ext': 'mp4',
1132                 'title': 'New experience with Acrobat DC',
1133                 'description': 'New experience with Acrobat DC',
1134                 'duration': 248.667,
1135             },
1136         },
1137         # ScreenwaveMedia embed
1138         {
1139             'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
1140             'md5': '24ace5baba0d35d55c6810b51f34e9e0',
1141             'info_dict': {
1142                 'id': 'cinemasnob-55d26273809dd',
1143                 'ext': 'mp4',
1144                 'title': 'cinemasnob',
1145             },
1146         },
1147         # BrightcoveInPageEmbed embed
1148         {
1149             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1150             'info_dict': {
1151                 'id': '4238694884001',
1152                 'ext': 'flv',
1153                 'title': 'Tabletop: Dread, Last Thoughts',
1154                 'description': 'Tabletop: Dread, Last Thoughts',
1155                 'duration': 51690,
1156             },
1157         },
1158         # JWPlayer with M3U8
1159         {
1160             'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
1161             'info_dict': {
1162                 'id': 'playlist',
1163                 'ext': 'mp4',
1164                 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
1165                 'uploader': 'ren.tv',
1166             },
1167             'params': {
1168                 # m3u8 downloads
1169                 'skip_download': True,
1170             }
1171         },
1172         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1173         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1174         {
1175             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1176             'info_dict': {
1177                 'id': '4785848093001',
1178                 'ext': 'mp4',
1179                 'title': 'The Cardinal Pell Interview',
1180                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1181                 'uploader': 'GlobeCast Australia - GlobeStream',
1182                 'uploader_id': '2733773828001',
1183                 'upload_date': '20160304',
1184                 'timestamp': 1457083087,
1185             },
1186             'params': {
1187                 # m3u8 downloads
1188                 'skip_download': True,
1189             },
1190         },
1191         # Another form of arte.tv embed
1192         {
1193             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1194             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1195             'info_dict': {
1196                 'id': '030273-562_PLUS7-F',
1197                 'ext': 'mp4',
1198                 'title': 'ARTE Reportage - Nulle part, en France',
1199                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1200                 'upload_date': '20160409',
1201             },
1202         },
1203         # LiveLeak embed
1204         {
1205             'url': 'http://www.wykop.pl/link/3088787/',
1206             'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1207             'info_dict': {
1208                 'id': '874_1459135191',
1209                 'ext': 'mp4',
1210                 'title': 'Man shows poor quality of new apartment building',
1211                 'description': 'The wall is like a sand pile.',
1212                 'uploader': 'Lake8737',
1213             }
1214         },
1215         # Duplicated embedded video URLs
1216         {
1217             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1218             'info_dict': {
1219                 'id': '149298443_480_16c25b74_2',
1220                 'ext': 'mp4',
1221                 'title': 'vs. Blue Orange Spring Game',
1222                 'uploader': 'www.hudl.com',
1223             },
1224         },
1225     ]
1226
1227     def report_following_redirect(self, new_url):
1228         """Report information extraction."""
1229         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1230
1231     def _extract_rss(self, url, video_id, doc):
1232         playlist_title = doc.find('./channel/title').text
1233         playlist_desc_el = doc.find('./channel/description')
1234         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1235
1236         entries = []
1237         for it in doc.findall('./channel/item'):
1238             next_url = xpath_text(it, 'link', fatal=False)
1239             if not next_url:
1240                 enclosure_nodes = it.findall('./enclosure')
1241                 for e in enclosure_nodes:
1242                     next_url = e.attrib.get('url')
1243                     if next_url:
1244                         break
1245
1246             if not next_url:
1247                 continue
1248
1249             entries.append({
1250                 '_type': 'url',
1251                 'url': next_url,
1252                 'title': it.find('title').text,
1253             })
1254
1255         return {
1256             '_type': 'playlist',
1257             'id': url,
1258             'title': playlist_title,
1259             'description': playlist_desc,
1260             'entries': entries,
1261         }
1262
1263     def _extract_camtasia(self, url, video_id, webpage):
1264         """ Returns None if no camtasia video can be found. """
1265
1266         camtasia_cfg = self._search_regex(
1267             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1268             webpage, 'camtasia configuration file', default=None)
1269         if camtasia_cfg is None:
1270             return None
1271
1272         title = self._html_search_meta('DC.title', webpage, fatal=True)
1273
1274         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1275         camtasia_cfg = self._download_xml(
1276             camtasia_url, video_id,
1277             note='Downloading camtasia configuration',
1278             errnote='Failed to download camtasia configuration')
1279         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1280
1281         entries = []
1282         for n in fileset_node.getchildren():
1283             url_n = n.find('./uri')
1284             if url_n is None:
1285                 continue
1286
1287             entries.append({
1288                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1289                 'title': '%s - %s' % (title, n.tag),
1290                 'url': compat_urlparse.urljoin(url, url_n.text),
1291                 'duration': float_or_none(n.find('./duration').text),
1292             })
1293
1294         return {
1295             '_type': 'playlist',
1296             'entries': entries,
1297             'title': title,
1298         }
1299
1300     def _real_extract(self, url):
1301         if url.startswith('//'):
1302             return {
1303                 '_type': 'url',
1304                 'url': self.http_scheme() + url,
1305             }
1306
1307         parsed_url = compat_urlparse.urlparse(url)
1308         if not parsed_url.scheme:
1309             default_search = self._downloader.params.get('default_search')
1310             if default_search is None:
1311                 default_search = 'fixup_error'
1312
1313             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1314                 if '/' in url:
1315                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1316                     return self.url_result('http://' + url)
1317                 elif default_search != 'fixup_error':
1318                     if default_search == 'auto_warning':
1319                         if re.match(r'^(?:url|URL)$', url):
1320                             raise ExtractorError(
1321                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1322                                 expected=True)
1323                         else:
1324                             self._downloader.report_warning(
1325                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1326                     return self.url_result('ytsearch:' + url)
1327
1328             if default_search in ('error', 'fixup_error'):
1329                 raise ExtractorError(
1330                     '%r is not a valid URL. '
1331                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1332                     % (url, url), expected=True)
1333             else:
1334                 if ':' not in default_search:
1335                     default_search += ':'
1336                 return self.url_result(default_search + url)
1337
1338         url, smuggled_data = unsmuggle_url(url)
1339         force_videoid = None
1340         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1341         if smuggled_data and 'force_videoid' in smuggled_data:
1342             force_videoid = smuggled_data['force_videoid']
1343             video_id = force_videoid
1344         else:
1345             video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
1346
1347         self.to_screen('%s: Requesting header' % video_id)
1348
1349         head_req = HEADRequest(url)
1350         head_response = self._request_webpage(
1351             head_req, video_id,
1352             note=False, errnote='Could not send HEAD request to %s' % url,
1353             fatal=False)
1354
1355         if head_response is not False:
1356             # Check for redirect
1357             new_url = head_response.geturl()
1358             if url != new_url:
1359                 self.report_following_redirect(new_url)
1360                 if force_videoid:
1361                     new_url = smuggle_url(
1362                         new_url, {'force_videoid': force_videoid})
1363                 return self.url_result(new_url)
1364
1365         full_response = None
1366         if head_response is False:
1367             request = sanitized_Request(url)
1368             request.add_header('Accept-Encoding', '*')
1369             full_response = self._request_webpage(request, video_id)
1370             head_response = full_response
1371
1372         info_dict = {
1373             'id': video_id,
1374             'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
1375             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1376         }
1377
1378         # Check for direct link to a video
1379         content_type = head_response.headers.get('Content-Type', '').lower()
1380         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1381         if m:
1382             format_id = m.group('format_id')
1383             if format_id.endswith('mpegurl'):
1384                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1385             elif format_id == 'f4m':
1386                 formats = self._extract_f4m_formats(url, video_id)
1387             else:
1388                 formats = [{
1389                     'format_id': m.group('format_id'),
1390                     'url': url,
1391                     'vcodec': 'none' if m.group('type') == 'audio' else None
1392                 }]
1393                 info_dict['direct'] = True
1394             self._sort_formats(formats)
1395             info_dict['formats'] = formats
1396             return info_dict
1397
1398         if not self._downloader.params.get('test', False) and not is_intentional:
1399             force = self._downloader.params.get('force_generic_extractor', False)
1400             self._downloader.report_warning(
1401                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1402
1403         if not full_response:
1404             request = sanitized_Request(url)
1405             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1406             # making it impossible to download only chunk of the file (yet we need only 512kB to
1407             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1408             # that will always result in downloading the whole file that is not desirable.
1409             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1410             # to accept raw bytes and being able to download only a chunk.
1411             # It may probably better to solve this by checking Content-Type for application/octet-stream
1412             # after HEAD request finishes, but not sure if we can rely on this.
1413             request.add_header('Accept-Encoding', '*')
1414             full_response = self._request_webpage(request, video_id)
1415
1416         first_bytes = full_response.read(512)
1417
1418         # Is it an M3U playlist?
1419         if first_bytes.startswith(b'#EXTM3U'):
1420             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1421             self._sort_formats(info_dict['formats'])
1422             return info_dict
1423
1424         # Maybe it's a direct link to a video?
1425         # Be careful not to download the whole thing!
1426         if not is_html(first_bytes):
1427             self._downloader.report_warning(
1428                 'URL could be a direct video link, returning it as such.')
1429             info_dict.update({
1430                 'direct': True,
1431                 'url': url,
1432             })
1433             return info_dict
1434
1435         webpage = self._webpage_read_content(
1436             full_response, url, video_id, prefix=first_bytes)
1437
1438         self.report_extraction(video_id)
1439
1440         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1441         try:
1442             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1443             if doc.tag == 'rss':
1444                 return self._extract_rss(url, video_id, doc)
1445             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1446                 smil = self._parse_smil(doc, url, video_id)
1447                 self._sort_formats(smil['formats'])
1448                 return smil
1449             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1450                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1451             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1452                 info_dict['formats'] = self._parse_mpd_formats(
1453                     doc, video_id, mpd_base_url=url.rpartition('/')[0])
1454                 self._sort_formats(info_dict['formats'])
1455                 return info_dict
1456             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1457                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1458                 self._sort_formats(info_dict['formats'])
1459                 return info_dict
1460         except compat_xml_parse_error:
1461             pass
1462
1463         # Is it a Camtasia project?
1464         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1465         if camtasia_res is not None:
1466             return camtasia_res
1467
1468         # Sometimes embedded video player is hidden behind percent encoding
1469         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1470         # Unescaping the whole page allows to handle those cases in a generic way
1471         webpage = compat_urllib_parse_unquote(webpage)
1472
1473         # it's tempting to parse this further, but you would
1474         # have to take into account all the variations like
1475         #   Video Title - Site Name
1476         #   Site Name | Video Title
1477         #   Video Title - Tagline | Site Name
1478         # and so on and so forth; it's just not practical
1479         video_title = self._og_search_title(
1480             webpage, default=None) or self._html_search_regex(
1481             r'(?s)<title>(.*?)</title>', webpage, 'video title',
1482             default='video')
1483
1484         # Try to detect age limit automatically
1485         age_limit = self._rta_search(webpage)
1486         # And then there are the jokers who advertise that they use RTA,
1487         # but actually don't.
1488         AGE_LIMIT_MARKERS = [
1489             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1490         ]
1491         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
1492             age_limit = 18
1493
1494         # video uploader is domain name
1495         video_uploader = self._search_regex(
1496             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
1497
1498         video_description = self._og_search_description(webpage, default=None)
1499         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
1500
1501         # Helper method
1502         def _playlist_from_matches(matches, getter=None, ie=None):
1503             urlrs = orderedSet(
1504                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
1505                 for m in matches)
1506             return self.playlist_result(
1507                 urlrs, playlist_id=video_id, playlist_title=video_title)
1508
1509         # Look for Brightcove Legacy Studio embeds
1510         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
1511         if bc_urls:
1512             self.to_screen('Brightcove video detected.')
1513             entries = [{
1514                 '_type': 'url',
1515                 'url': smuggle_url(bc_url, {'Referer': url}),
1516                 'ie_key': 'BrightcoveLegacy'
1517             } for bc_url in bc_urls]
1518
1519             return {
1520                 '_type': 'playlist',
1521                 'title': video_title,
1522                 'id': video_id,
1523                 'entries': entries,
1524             }
1525
1526         # Look for Brightcove New Studio embeds
1527         bc_urls = BrightcoveNewIE._extract_urls(webpage)
1528         if bc_urls:
1529             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
1530
1531         # Look for ThePlatform embeds
1532         tp_urls = ThePlatformIE._extract_urls(webpage)
1533         if tp_urls:
1534             return _playlist_from_matches(tp_urls, ie='ThePlatform')
1535
1536         # Look for embedded rtl.nl player
1537         matches = re.findall(
1538             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
1539             webpage)
1540         if matches:
1541             return _playlist_from_matches(matches, ie='RtlNl')
1542
1543         vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
1544         if vimeo_url is not None:
1545             return self.url_result(vimeo_url)
1546
1547         vid_me_embed_url = self._search_regex(
1548             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
1549             webpage, 'vid.me embed', default=None)
1550         if vid_me_embed_url is not None:
1551             return self.url_result(vid_me_embed_url, 'Vidme')
1552
1553         # Look for embedded YouTube player
1554         matches = re.findall(r'''(?x)
1555             (?:
1556                 <iframe[^>]+?src=|
1557                 data-video-url=|
1558                 <embed[^>]+?src=|
1559                 embedSWF\(?:\s*|
1560                 new\s+SWFObject\(
1561             )
1562             (["\'])
1563                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1564                 (?:embed|v|p)/.+?)
1565             \1''', webpage)
1566         if matches:
1567             return _playlist_from_matches(
1568                 matches, lambda m: unescapeHTML(m[1]))
1569
1570         # Look for lazyYT YouTube embed
1571         matches = re.findall(
1572             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
1573         if matches:
1574             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1575
1576         # Look for embedded Dailymotion player
1577         matches = re.findall(
1578             r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
1579         if matches:
1580             return _playlist_from_matches(
1581                 matches, lambda m: unescapeHTML(m[1]))
1582
1583         # Look for embedded Dailymotion playlist player (#3822)
1584         m = re.search(
1585             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1586         if m:
1587             playlists = re.findall(
1588                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1589             if playlists:
1590                 return _playlist_from_matches(
1591                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1592
1593         # Look for embedded Wistia player
1594         match = re.search(
1595             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
1596         if match:
1597             embed_url = self._proto_relative_url(
1598                 unescapeHTML(match.group('url')))
1599             return {
1600                 '_type': 'url_transparent',
1601                 'url': embed_url,
1602                 'ie_key': 'Wistia',
1603                 'uploader': video_uploader,
1604             }
1605
1606         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
1607         if match:
1608             return {
1609                 '_type': 'url_transparent',
1610                 'url': 'wistia:%s' % match.group('id'),
1611                 'ie_key': 'Wistia',
1612                 'uploader': video_uploader,
1613             }
1614
1615         match = re.search(
1616             r'''(?sx)
1617                 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
1618                 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
1619             ''', webpage)
1620         if match:
1621             return self.url_result(self._proto_relative_url(
1622                 'wistia:%s' % match.group('id')), 'Wistia')
1623
1624         # Look for SVT player
1625         svt_url = SVTIE._extract_url(webpage)
1626         if svt_url:
1627             return self.url_result(svt_url, 'SVT')
1628
1629         # Look for embedded condenast player
1630         matches = re.findall(
1631             r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
1632             webpage)
1633         if matches:
1634             return {
1635                 '_type': 'playlist',
1636                 'entries': [{
1637                     '_type': 'url',
1638                     'ie_key': 'CondeNast',
1639                     'url': ma,
1640                 } for ma in matches],
1641                 'title': video_title,
1642                 'id': video_id,
1643             }
1644
1645         # Look for Bandcamp pages with custom domain
1646         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
1647         if mobj is not None:
1648             burl = unescapeHTML(mobj.group(1))
1649             # Don't set the extractor because it can be a track url or an album
1650             return self.url_result(burl)
1651
1652         # Look for embedded Vevo player
1653         mobj = re.search(
1654             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
1655         if mobj is not None:
1656             return self.url_result(mobj.group('url'))
1657
1658         # Look for embedded Viddler player
1659         mobj = re.search(
1660             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1661             webpage)
1662         if mobj is not None:
1663             return self.url_result(mobj.group('url'))
1664
1665         # Look for NYTimes player
1666         mobj = re.search(
1667             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
1668             webpage)
1669         if mobj is not None:
1670             return self.url_result(mobj.group('url'))
1671
1672         # Look for Libsyn player
1673         mobj = re.search(
1674             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1675         if mobj is not None:
1676             return self.url_result(mobj.group('url'))
1677
1678         # Look for Ooyala videos
1679         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1680                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
1681                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
1682                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
1683         if mobj is not None:
1684             return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
1685
1686         # Look for multiple Ooyala embeds on SBN network websites
1687         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
1688         if mobj is not None:
1689             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1690             if embeds:
1691                 return _playlist_from_matches(
1692                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
1693
1694         # Look for Aparat videos
1695         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1696         if mobj is not None:
1697             return self.url_result(mobj.group(1), 'Aparat')
1698
1699         # Look for MPORA videos
1700         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
1701         if mobj is not None:
1702             return self.url_result(mobj.group(1), 'Mpora')
1703
1704         # Look for embedded NovaMov-based player
1705         mobj = re.search(
1706             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1707                     (?P<url>http://(?:(?:embed|www)\.)?
1708                         (?:novamov\.com|
1709                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
1710                            videoweed\.(?:es|com)|
1711                            movshare\.(?:net|sx|ag)|
1712                            divxstage\.(?:eu|net|ch|co|at|ag))
1713                         /embed\.php.+?)\1''', webpage)
1714         if mobj is not None:
1715             return self.url_result(mobj.group('url'))
1716
1717         # Look for embedded Facebook player
1718         mobj = re.search(
1719             r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1720         if mobj is not None:
1721             return self.url_result(mobj.group('url'), 'Facebook')
1722
1723         # Look for embedded VK player
1724         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
1725         if mobj is not None:
1726             return self.url_result(mobj.group('url'), 'VK')
1727
1728         # Look for embedded Odnoklassniki player
1729         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
1730         if mobj is not None:
1731             return self.url_result(mobj.group('url'), 'Odnoklassniki')
1732
1733         # Look for embedded ivi player
1734         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1735         if mobj is not None:
1736             return self.url_result(mobj.group('url'), 'Ivi')
1737
1738         # Look for embedded Huffington Post player
1739         mobj = re.search(
1740             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
1741         if mobj is not None:
1742             return self.url_result(mobj.group('url'), 'HuffPost')
1743
1744         # Look for embed.ly
1745         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
1746         if mobj is not None:
1747             return self.url_result(mobj.group('url'))
1748         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
1749         if mobj is not None:
1750             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
1751
1752         # Look for funnyordie embed
1753         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1754         if matches:
1755             return _playlist_from_matches(
1756                 matches, getter=unescapeHTML, ie='FunnyOrDie')
1757
1758         # Look for BBC iPlayer embed
1759         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
1760         if matches:
1761             return _playlist_from_matches(matches, ie='BBCCoUk')
1762
1763         # Look for embedded RUTV player
1764         rutv_url = RUTVIE._extract_url(webpage)
1765         if rutv_url:
1766             return self.url_result(rutv_url, 'RUTV')
1767
1768         # Look for embedded TVC player
1769         tvc_url = TVCIE._extract_url(webpage)
1770         if tvc_url:
1771             return self.url_result(tvc_url, 'TVC')
1772
1773         # Look for embedded SportBox player
1774         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1775         if sportbox_urls:
1776             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1777
1778         # Look for embedded PornHub player
1779         pornhub_url = PornHubIE._extract_url(webpage)
1780         if pornhub_url:
1781             return self.url_result(pornhub_url, 'PornHub')
1782
1783         # Look for embedded XHamster player
1784         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
1785         if xhamster_urls:
1786             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
1787
1788         # Look for embedded TNAFlixNetwork player
1789         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
1790         if tnaflix_urls:
1791             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
1792
1793         # Look for embedded Tvigle player
1794         mobj = re.search(
1795             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
1796         if mobj is not None:
1797             return self.url_result(mobj.group('url'), 'Tvigle')
1798
1799         # Look for embedded TED player
1800         mobj = re.search(
1801             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
1802         if mobj is not None:
1803             return self.url_result(mobj.group('url'), 'TED')
1804
1805         # Look for embedded Ustream videos
1806         mobj = re.search(
1807             r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1808         if mobj is not None:
1809             return self.url_result(mobj.group('url'), 'Ustream')
1810
1811         # Look for embedded arte.tv player
1812         mobj = re.search(
1813             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
1814             webpage)
1815         if mobj is not None:
1816             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
1817
1818         # Look for embedded francetv player
1819         mobj = re.search(
1820             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
1821             webpage)
1822         if mobj is not None:
1823             return self.url_result(mobj.group('url'))
1824
1825         # Look for embedded smotri.com player
1826         smotri_url = SmotriIE._extract_url(webpage)
1827         if smotri_url:
1828             return self.url_result(smotri_url, 'Smotri')
1829
1830         # Look for embedded Myvi.ru player
1831         myvi_url = MyviIE._extract_url(webpage)
1832         if myvi_url:
1833             return self.url_result(myvi_url)
1834
1835         # Look for embedded soundcloud player
1836         mobj = re.search(
1837             r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
1838             webpage)
1839         if mobj is not None:
1840             url = unescapeHTML(mobj.group('url'))
1841             return self.url_result(url)
1842
1843         # Look for embedded vulture.com player
1844         mobj = re.search(
1845             r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1846             webpage)
1847         if mobj is not None:
1848             url = unescapeHTML(mobj.group('url'))
1849             return self.url_result(url, ie='Vulture')
1850
1851         # Look for embedded mtvservices player
1852         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
1853         if mtvservices_url:
1854             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
1855
1856         # Look for embedded yahoo player
1857         mobj = re.search(
1858             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1859             webpage)
1860         if mobj is not None:
1861             return self.url_result(mobj.group('url'), 'Yahoo')
1862
1863         # Look for embedded sbs.com.au player
1864         mobj = re.search(
1865             r'''(?x)
1866             (?:
1867                 <meta\s+property="og:video"\s+content=|
1868                 <iframe[^>]+?src=
1869             )
1870             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
1871             webpage)
1872         if mobj is not None:
1873             return self.url_result(mobj.group('url'), 'SBS')
1874
1875         # Look for embedded Cinchcast player
1876         mobj = re.search(
1877             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1878             webpage)
1879         if mobj is not None:
1880             return self.url_result(mobj.group('url'), 'Cinchcast')
1881
1882         mobj = re.search(
1883             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1884             webpage)
1885         if not mobj:
1886             mobj = re.search(
1887                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1888                 webpage)
1889         if mobj is not None:
1890             return self.url_result(mobj.group('url'), 'MLB')
1891
1892         mobj = re.search(
1893             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1894             webpage)
1895         if mobj is not None:
1896             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1897
1898         mobj = re.search(
1899             r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
1900             webpage)
1901         if mobj is not None:
1902             return self.url_result(mobj.group('url'), 'Livestream')
1903
1904         # Look for Zapiks embed
1905         mobj = re.search(
1906             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1907         if mobj is not None:
1908             return self.url_result(mobj.group('url'), 'Zapiks')
1909
1910         # Look for Kaltura embeds
1911         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
1912                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
1913         if mobj is not None:
1914             return self.url_result(smuggle_url(
1915                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
1916                 {'source_url': url}), 'Kaltura')
1917
1918         # Look for Eagle.Platform embeds
1919         mobj = re.search(
1920             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1921         if mobj is not None:
1922             return self.url_result(mobj.group('url'), 'EaglePlatform')
1923
1924         # Look for ClipYou (uses Eagle.Platform) embeds
1925         mobj = re.search(
1926             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1927         if mobj is not None:
1928             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1929
1930         # Look for Pladform embeds
1931         pladform_url = PladformIE._extract_url(webpage)
1932         if pladform_url:
1933             return self.url_result(pladform_url)
1934
1935         # Look for Videomore embeds
1936         videomore_url = VideomoreIE._extract_url(webpage)
1937         if videomore_url:
1938             return self.url_result(videomore_url)
1939
1940         # Look for Playwire embeds
1941         mobj = re.search(
1942             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1943         if mobj is not None:
1944             return self.url_result(mobj.group('url'))
1945
1946         # Look for 5min embeds
1947         mobj = re.search(
1948             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1949         if mobj is not None:
1950             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1951
1952         # Look for Crooks and Liars embeds
1953         mobj = re.search(
1954             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1955         if mobj is not None:
1956             return self.url_result(mobj.group('url'))
1957
1958         # Look for NBC Sports VPlayer embeds
1959         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1960         if nbc_sports_url:
1961             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1962
1963         # Look for Google Drive embeds
1964         google_drive_url = GoogleDriveIE._extract_url(webpage)
1965         if google_drive_url:
1966             return self.url_result(google_drive_url, 'GoogleDrive')
1967
1968         # Look for UDN embeds
1969         mobj = re.search(
1970             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
1971         if mobj is not None:
1972             return self.url_result(
1973                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1974
1975         # Look for Senate ISVP iframe
1976         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1977         if senate_isvp_url:
1978             return self.url_result(senate_isvp_url, 'SenateISVP')
1979
1980         # Look for Dailymotion Cloud videos
1981         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
1982         if dmcloud_url:
1983             return self.url_result(dmcloud_url, 'DailymotionCloud')
1984
1985         # Look for OnionStudios embeds
1986         onionstudios_url = OnionStudiosIE._extract_url(webpage)
1987         if onionstudios_url:
1988             return self.url_result(onionstudios_url)
1989
1990         # Look for ViewLift embeds
1991         viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
1992         if viewlift_url:
1993             return self.url_result(viewlift_url)
1994
1995         # Look for JWPlatform embeds
1996         jwplatform_url = JWPlatformIE._extract_url(webpage)
1997         if jwplatform_url:
1998             return self.url_result(jwplatform_url, 'JWPlatform')
1999
2000         # Look for ScreenwaveMedia embeds
2001         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
2002         if mobj is not None:
2003             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
2004
2005         # Look for Digiteka embeds
2006         digiteka_url = DigitekaIE._extract_url(webpage)
2007         if digiteka_url:
2008             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2009
2010         # Look for Limelight embeds
2011         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
2012         if mobj:
2013             lm = {
2014                 'Media': 'media',
2015                 'Channel': 'channel',
2016                 'ChannelList': 'channel_list',
2017             }
2018             return self.url_result('limelight:%s:%s' % (
2019                 lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
2020
2021         # Look for AdobeTVVideo embeds
2022         mobj = re.search(
2023             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2024             webpage)
2025         if mobj is not None:
2026             return self.url_result(
2027                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2028                 'AdobeTVVideo')
2029
2030         # Look for Vine embeds
2031         mobj = re.search(
2032             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2033             webpage)
2034         if mobj is not None:
2035             return self.url_result(
2036                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2037
2038         # Look for Instagram embeds
2039         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2040         if instagram_embed_url is not None:
2041             return self.url_result(
2042                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2043
2044         # Look for LiveLeak embeds
2045         liveleak_url = LiveLeakIE._extract_url(webpage)
2046         if liveleak_url:
2047             return self.url_result(liveleak_url, 'LiveLeak')
2048
2049         # Look for 3Q SDN embeds
2050         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2051         if threeqsdn_url:
2052             return {
2053                 '_type': 'url_transparent',
2054                 'ie_key': ThreeQSDNIE.ie_key(),
2055                 'url': self._proto_relative_url(threeqsdn_url),
2056                 'title': video_title,
2057                 'description': video_description,
2058                 'thumbnail': video_thumbnail,
2059                 'uploader': video_uploader,
2060             }
2061
2062         def check_video(vurl):
2063             if YoutubeIE.suitable(vurl):
2064                 return True
2065             vpath = compat_urlparse.urlparse(vurl).path
2066             vext = determine_ext(vpath)
2067             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
2068
2069         def filter_video(urls):
2070             return list(filter(check_video, urls))
2071
2072         # Start with something easy: JW Player in SWFObject
2073         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
2074         if not found:
2075             # Look for gorilla-vid style embedding
2076             found = filter_video(re.findall(r'''(?sx)
2077                 (?:
2078                     jw_plugins|
2079                     JWPlayerOptions|
2080                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2081                 )
2082                 .*?
2083                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
2084         if not found:
2085             # Broaden the search a little bit
2086             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
2087         if not found:
2088             # Broaden the findall a little bit: JWPlayer JS loader
2089             found = filter_video(re.findall(
2090                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
2091         if not found:
2092             # Flow player
2093             found = filter_video(re.findall(r'''(?xs)
2094                 flowplayer\("[^"]+",\s*
2095                     \{[^}]+?\}\s*,
2096                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
2097                         ["']?url["']?\s*:\s*["']([^"']+)["']
2098             ''', webpage))
2099         if not found:
2100             # Cinerama player
2101             found = re.findall(
2102                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
2103         if not found:
2104             # Try to find twitter cards info
2105             found = filter_video(re.findall(
2106                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
2107         if not found:
2108             # We look for Open Graph info:
2109             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
2110             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
2111             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2112             if m_video_type is not None:
2113                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
2114         if not found:
2115             # HTML5 video
2116             found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
2117         if not found:
2118             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
2119             found = re.search(
2120                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2121                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2122                 webpage)
2123             if not found:
2124                 # Look also in Refresh HTTP header
2125                 refresh_header = head_response.headers.get('Refresh')
2126                 if refresh_header:
2127                     # In python 2 response HTTP headers are bytestrings
2128                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2129                         refresh_header = refresh_header.decode('iso-8859-1')
2130                     found = re.search(REDIRECT_REGEX, refresh_header)
2131             if found:
2132                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2133                 self.report_following_redirect(new_url)
2134                 return {
2135                     '_type': 'url',
2136                     'url': new_url,
2137                 }
2138         if not found:
2139             raise UnsupportedError(url)
2140
2141         entries = []
2142         for video_url in orderedSet(found):
2143             video_url = unescapeHTML(video_url)
2144             video_url = video_url.replace('\\/', '/')
2145             video_url = compat_urlparse.urljoin(url, video_url)
2146             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2147
2148             # Sometimes, jwplayer extraction will result in a YouTube URL
2149             if YoutubeIE.suitable(video_url):
2150                 entries.append(self.url_result(video_url, 'Youtube'))
2151                 continue
2152
2153             # here's a fun little line of code for you:
2154             video_id = os.path.splitext(video_id)[0]
2155
2156             entry_info_dict = {
2157                 'id': video_id,
2158                 'uploader': video_uploader,
2159                 'title': video_title,
2160                 'age_limit': age_limit,
2161             }
2162
2163             ext = determine_ext(video_url)
2164             if ext == 'smil':
2165                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2166             elif ext == 'xspf':
2167                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2168             elif ext == 'm3u8':
2169                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2170             elif ext == 'mpd':
2171                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2172             elif ext == 'f4m':
2173                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2174             else:
2175                 entry_info_dict['url'] = video_url
2176
2177             if entry_info_dict.get('formats'):
2178                 self._sort_formats(entry_info_dict['formats'])
2179
2180             entries.append(entry_info_dict)
2181
2182         if len(entries) == 1:
2183             return entries[0]
2184         else:
2185             for num, e in enumerate(entries, start=1):
2186                 # 'url' results don't have a title
2187                 if e.get('title') is not None:
2188                     e['title'] = '%s (%d)' % (e['title'], num)
2189             return {
2190                 '_type': 'playlist',
2191                 'entries': entries,
2192             }