af1322e0085befa144605f16c22a52fcca5a3bcf
[youtube-dl] / youtube_dl / extractor / generic.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7 import sys
8
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12     compat_etree_fromstring,
13     compat_str,
14     compat_urllib_parse_unquote,
15     compat_urlparse,
16     compat_xml_parse_error,
17 )
18 from ..utils import (
19     determine_ext,
20     ExtractorError,
21     float_or_none,
22     HEADRequest,
23     is_html,
24     js_to_json,
25     KNOWN_EXTENSIONS,
26     mimetype2ext,
27     orderedSet,
28     sanitized_Request,
29     smuggle_url,
30     unescapeHTML,
31     unified_strdate,
32     unsmuggle_url,
33     UnsupportedError,
34     xpath_text,
35 )
36 from .commonprotocols import RtmpIE
37 from .brightcove import (
38     BrightcoveLegacyIE,
39     BrightcoveNewIE,
40 )
41 from .nexx import (
42     NexxIE,
43     NexxEmbedIE,
44 )
45 from .nbc import NBCSportsVPlayerIE
46 from .ooyala import OoyalaIE
47 from .rutv import RUTVIE
48 from .tvc import TVCIE
49 from .sportbox import SportBoxEmbedIE
50 from .smotri import SmotriIE
51 from .myvi import MyviIE
52 from .condenast import CondeNastIE
53 from .udn import UDNEmbedIE
54 from .senateisvp import SenateISVPIE
55 from .svt import SVTIE
56 from .pornhub import PornHubIE
57 from .xhamster import XHamsterEmbedIE
58 from .tnaflix import TNAFlixNetworkEmbedIE
59 from .drtuber import DrTuberIE
60 from .redtube import RedTubeIE
61 from .tube8 import Tube8IE
62 from .vimeo import VimeoIE
63 from .dailymotion import DailymotionIE
64 from .dailymail import DailyMailIE
65 from .onionstudios import OnionStudiosIE
66 from .viewlift import ViewLiftEmbedIE
67 from .mtv import MTVServicesEmbeddedIE
68 from .pladform import PladformIE
69 from .videomore import VideomoreIE
70 from .webcaster import WebcasterFeedIE
71 from .googledrive import GoogleDriveIE
72 from .jwplatform import JWPlatformIE
73 from .digiteka import DigitekaIE
74 from .arkena import ArkenaIE
75 from .instagram import InstagramIE
76 from .liveleak import LiveLeakIE
77 from .threeqsdn import ThreeQSDNIE
78 from .theplatform import ThePlatformIE
79 from .vessel import VesselIE
80 from .kaltura import KalturaIE
81 from .eagleplatform import EaglePlatformIE
82 from .facebook import FacebookIE
83 from .soundcloud import SoundcloudIE
84 from .tunein import TuneInBaseIE
85 from .vbox7 import Vbox7IE
86 from .dbtv import DBTVIE
87 from .piksel import PikselIE
88 from .videa import VideaIE
89 from .twentymin import TwentyMinutenIE
90 from .ustream import UstreamIE
91 from .openload import OpenloadIE
92 from .videopress import VideoPressIE
93 from .rutube import RutubeIE
94 from .limelight import LimelightBaseIE
95 from .anvato import AnvatoIE
96 from .washingtonpost import WashingtonPostIE
97 from .wistia import WistiaIE
98 from .mediaset import MediasetIE
99 from .joj import JojIE
100 from .megaphone import MegaphoneIE
101 from .vzaar import VzaarIE
102 from .channel9 import Channel9IE
103 from .vshare import VShareIE
104 from .mediasite import MediasiteIE
105 from .springboardplatform import SpringboardPlatformIE
106 from .yapfiles import YapFilesIE
107 from .vice import ViceIE
108 from .xfileshare import XFileShareIE
109
110
111 class GenericIE(InfoExtractor):
112     IE_DESC = 'Generic downloader that works on some sites'
113     _VALID_URL = r'.*'
114     IE_NAME = 'generic'
115     _TESTS = [
116         # Direct link to a video
117         {
118             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
119             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
120             'info_dict': {
121                 'id': 'trailer',
122                 'ext': 'mp4',
123                 'title': 'trailer',
124                 'upload_date': '20100513',
125             }
126         },
127         # Direct link to media delivered compressed (until Accept-Encoding is *)
128         {
129             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
130             'md5': '128c42e68b13950268b648275386fc74',
131             'info_dict': {
132                 'id': 'FictionJunction-Parallel_Hearts',
133                 'ext': 'flac',
134                 'title': 'FictionJunction-Parallel_Hearts',
135                 'upload_date': '20140522',
136             },
137             'expected_warnings': [
138                 'URL could be a direct video link, returning it as such.'
139             ],
140             'skip': 'URL invalid',
141         },
142         # Direct download with broken HEAD
143         {
144             'url': 'http://ai-radio.org:8000/radio.opus',
145             'info_dict': {
146                 'id': 'radio',
147                 'ext': 'opus',
148                 'title': 'radio',
149             },
150             'params': {
151                 'skip_download': True,  # infinite live stream
152             },
153             'expected_warnings': [
154                 r'501.*Not Implemented',
155                 r'400.*Bad Request',
156             ],
157         },
158         # Direct link with incorrect MIME type
159         {
160             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
161             'md5': '4ccbebe5f36706d85221f204d7eb5913',
162             'info_dict': {
163                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
164                 'id': '5_Lennart_Poettering_-_Systemd',
165                 'ext': 'webm',
166                 'title': '5_Lennart_Poettering_-_Systemd',
167                 'upload_date': '20141120',
168             },
169             'expected_warnings': [
170                 'URL could be a direct video link, returning it as such.'
171             ]
172         },
173         # RSS feed
174         {
175             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
176             'info_dict': {
177                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
178                 'title': 'Zero Punctuation',
179                 'description': 're:.*groundbreaking video review series.*'
180             },
181             'playlist_mincount': 11,
182         },
183         # RSS feed with enclosure
184         {
185             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
186             'info_dict': {
187                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
188                 'ext': 'm4v',
189                 'upload_date': '20150228',
190                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
191             }
192         },
193         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
194         {
195             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
196             'info_dict': {
197                 'id': 'smil',
198                 'ext': 'mp4',
199                 'title': 'Automatics, robotics and biocybernetics',
200                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
201                 'upload_date': '20130627',
202                 'formats': 'mincount:16',
203                 'subtitles': 'mincount:1',
204             },
205             'params': {
206                 'force_generic_extractor': True,
207                 'skip_download': True,
208             },
209         },
210         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
211         {
212             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
213             'info_dict': {
214                 'id': 'hds',
215                 'ext': 'flv',
216                 'title': 'hds',
217                 'formats': 'mincount:1',
218             },
219             'params': {
220                 'skip_download': True,
221             },
222         },
223         # SMIL from https://www.restudy.dk/video/play/id/1637
224         {
225             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
226             'info_dict': {
227                 'id': 'video_1637',
228                 'ext': 'flv',
229                 'title': 'video_1637',
230                 'formats': 'mincount:3',
231             },
232             'params': {
233                 'skip_download': True,
234             },
235         },
236         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
237         {
238             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
239             'info_dict': {
240                 'id': 'smil-service',
241                 'ext': 'flv',
242                 'title': 'smil-service',
243                 'formats': 'mincount:1',
244             },
245             'params': {
246                 'skip_download': True,
247             },
248         },
249         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
250         {
251             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
252             'info_dict': {
253                 'id': '4719370',
254                 'ext': 'mp4',
255                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
256                 'formats': 'mincount:3',
257             },
258             'params': {
259                 'skip_download': True,
260             },
261         },
262         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
263         {
264             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
265             'info_dict': {
266                 'id': 'mZlp2ctYIUEB',
267                 'ext': 'mp4',
268                 'title': 'Tikibad ontruimd wegens brand',
269                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
270                 'thumbnail': r're:^https?://.*\.jpg$',
271                 'duration': 33,
272             },
273             'params': {
274                 'skip_download': True,
275             },
276         },
277         # MPD from http://dash-mse-test.appspot.com/media.html
278         {
279             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
280             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
281             'info_dict': {
282                 'id': 'car-20120827-manifest',
283                 'ext': 'mp4',
284                 'title': 'car-20120827-manifest',
285                 'formats': 'mincount:9',
286                 'upload_date': '20130904',
287             },
288             'params': {
289                 'format': 'bestvideo',
290             },
291         },
292         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
293         {
294             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
295             'info_dict': {
296                 'id': 'content',
297                 'ext': 'mp4',
298                 'title': 'content',
299                 'formats': 'mincount:8',
300             },
301             'params': {
302                 # m3u8 downloads
303                 'skip_download': True,
304             },
305             'skip': 'video gone',
306         },
307         # m3u8 served with Content-Type: text/plain
308         {
309             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
310             'info_dict': {
311                 'id': 'index',
312                 'ext': 'mp4',
313                 'title': 'index',
314                 'upload_date': '20140720',
315                 'formats': 'mincount:11',
316             },
317             'params': {
318                 # m3u8 downloads
319                 'skip_download': True,
320             },
321             'skip': 'video gone',
322         },
323         # google redirect
324         {
325             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
326             'info_dict': {
327                 'id': 'cmQHVoWB5FY',
328                 'ext': 'mp4',
329                 'upload_date': '20130224',
330                 'uploader_id': 'TheVerge',
331                 'description': r're:^Chris Ziegler takes a look at the\.*',
332                 'uploader': 'The Verge',
333                 'title': 'First Firefox OS phones side-by-side',
334             },
335             'params': {
336                 'skip_download': False,
337             }
338         },
339         {
340             # redirect in Refresh HTTP header
341             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
342             'info_dict': {
343                 'id': 'pO8h3EaFRdo',
344                 'ext': 'mp4',
345                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
346                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
347                 'upload_date': '20150917',
348                 'uploader_id': 'brtvofficial',
349                 'uploader': 'Boiler Room',
350             },
351             'params': {
352                 'skip_download': False,
353             },
354         },
355         {
356             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
357             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
358             'info_dict': {
359                 'id': '13601338388002',
360                 'ext': 'mp4',
361                 'uploader': 'www.hodiho.fr',
362                 'title': 'R\u00e9gis plante sa Jeep',
363             }
364         },
365         # bandcamp page with custom domain
366         {
367             'add_ie': ['Bandcamp'],
368             'url': 'http://bronyrock.com/track/the-pony-mash',
369             'info_dict': {
370                 'id': '3235767654',
371                 'ext': 'mp3',
372                 'title': 'The Pony Mash',
373                 'uploader': 'M_Pallante',
374             },
375             'skip': 'There is a limit of 200 free downloads / month for the test song',
376         },
377         {
378             # embedded brightcove video
379             # it also tests brightcove videos that need to set the 'Referer'
380             # in the http requests
381             'add_ie': ['BrightcoveLegacy'],
382             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
383             'info_dict': {
384                 'id': '2765128793001',
385                 'ext': 'mp4',
386                 'title': 'Le cours de bourse : l’analyse technique',
387                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
388                 'uploader': 'BFM BUSINESS',
389             },
390             'params': {
391                 'skip_download': True,
392             },
393         },
394         {
395             # embedded with itemprop embedURL and video id spelled as `idVideo`
396             'add_id': ['BrightcoveLegacy'],
397             'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
398             'info_dict': {
399                 'id': '5255628253001',
400                 'ext': 'mp4',
401                 'title': 'md5:37c519b1128915607601e75a87995fc0',
402                 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
403                 'uploader': 'BFM BUSINESS',
404                 'uploader_id': '876450612001',
405                 'timestamp': 1482255315,
406                 'upload_date': '20161220',
407             },
408             'params': {
409                 'skip_download': True,
410             },
411         },
412         {
413             # https://github.com/rg3/youtube-dl/issues/2253
414             'url': 'http://bcove.me/i6nfkrc3',
415             'md5': '0ba9446db037002366bab3b3eb30c88c',
416             'info_dict': {
417                 'id': '3101154703001',
418                 'ext': 'mp4',
419                 'title': 'Still no power',
420                 'uploader': 'thestar.com',
421                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
422             },
423             'add_ie': ['BrightcoveLegacy'],
424             'skip': 'video gone',
425         },
426         {
427             'url': 'http://www.championat.com/video/football/v/87/87499.html',
428             'md5': 'fb973ecf6e4a78a67453647444222983',
429             'info_dict': {
430                 'id': '3414141473001',
431                 'ext': 'mp4',
432                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
433                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
434                 'uploader': 'Championat',
435             },
436         },
437         {
438             # https://github.com/rg3/youtube-dl/issues/3541
439             'add_ie': ['BrightcoveLegacy'],
440             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
441             'info_dict': {
442                 'id': '3866516442001',
443                 'ext': 'mp4',
444                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
445                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
446                 'uploader': 'SBS Broadcasting',
447             },
448             'skip': 'Restricted to Netherlands',
449             'params': {
450                 'skip_download': True,  # m3u8 download
451             },
452         },
453         {
454             # Brightcove video in <iframe>
455             'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
456             'md5': '36d74ef5e37c8b4a2ce92880d208b968',
457             'info_dict': {
458                 'id': '5360463607001',
459                 'ext': 'mp4',
460                 'title': '叙利亚失明儿童在废墟上演唱《心跳》  呼吁获得正常童年生活',
461                 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
462                 'uploader': 'United Nations',
463                 'uploader_id': '1362235914001',
464                 'timestamp': 1489593889,
465                 'upload_date': '20170315',
466             },
467             'add_ie': ['BrightcoveLegacy'],
468         },
469         {
470             # Brightcove with alternative playerID key
471             'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
472             'info_dict': {
473                 'id': 'nmeth.2062_SV1',
474                 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
475             },
476             'playlist': [{
477                 'info_dict': {
478                     'id': '2228375078001',
479                     'ext': 'mp4',
480                     'title': 'nmeth.2062-sv1',
481                     'description': 'nmeth.2062-sv1',
482                     'timestamp': 1363357591,
483                     'upload_date': '20130315',
484                     'uploader': 'Nature Publishing Group',
485                     'uploader_id': '1964492299001',
486                 },
487             }],
488         },
489         {
490             # Brightcove with UUID in videoPlayer
491             'url': 'http://www8.hp.com/cn/zh/home.html',
492             'info_dict': {
493                 'id': '5255815316001',
494                 'ext': 'mp4',
495                 'title': 'Sprocket Video - China',
496                 'description': 'Sprocket Video - China',
497                 'uploader': 'HP-Video Gallery',
498                 'timestamp': 1482263210,
499                 'upload_date': '20161220',
500                 'uploader_id': '1107601872001',
501             },
502             'params': {
503                 'skip_download': True,  # m3u8 download
504             },
505             'skip': 'video rotates...weekly?',
506         },
507         {
508             # Brightcove:new type [2].
509             'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
510             'md5': '2b35148fcf48da41c9fb4591650784f3',
511             'info_dict': {
512                 'id': '5348741021001',
513                 'ext': 'mp4',
514                 'upload_date': '20170306',
515                 'uploader_id': '4191638492001',
516                 'timestamp': 1488769918,
517                 'title': 'VIDEO:  St. Thomas More earns first trip to basketball semis',
518
519             },
520         },
521         {
522             # Alternative brightcove <video> attributes
523             'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
524             'info_dict': {
525                 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
526                 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
527             },
528             'playlist': [{
529                 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
530                 'info_dict': {
531                     'id': '5311302538001',
532                     'ext': 'mp4',
533                     'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
534                     'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
535                     'timestamp': 1486321708,
536                     'upload_date': '20170205',
537                     'uploader_id': '800000640001',
538                 },
539                 'only_matching': True,
540             }],
541         },
542         {
543             # Brightcove with UUID in videoPlayer
544             'url': 'http://www8.hp.com/cn/zh/home.html',
545             'info_dict': {
546                 'id': '5255815316001',
547                 'ext': 'mp4',
548                 'title': 'Sprocket Video - China',
549                 'description': 'Sprocket Video - China',
550                 'uploader': 'HP-Video Gallery',
551                 'timestamp': 1482263210,
552                 'upload_date': '20161220',
553                 'uploader_id': '1107601872001',
554             },
555             'params': {
556                 'skip_download': True,  # m3u8 download
557             },
558         },
559         # ooyala video
560         {
561             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
562             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
563             'info_dict': {
564                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
565                 'ext': 'mp4',
566                 'title': '2cc213299525360.mov',  # that's what we get
567                 'duration': 238.231,
568             },
569             'add_ie': ['Ooyala'],
570         },
571         {
572             # ooyala video embedded with http://player.ooyala.com/iframe.js
573             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
574             'info_dict': {
575                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
576                 'ext': 'mp4',
577                 'title': '"Steve Jobs: Man in the Machine" trailer',
578                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
579                 'duration': 135.427,
580             },
581             'params': {
582                 'skip_download': True,
583             },
584             'skip': 'movie expired',
585         },
586         # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
587         {
588             'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
589             'info_dict': {
590                 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
591                 'ext': 'mp4',
592                 'title': 'Steampunk Fest Comes to Honesdale',
593                 'duration': 43.276,
594             },
595             'params': {
596                 'skip_download': True,
597             }
598         },
599         # embed.ly video
600         {
601             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
602             'info_dict': {
603                 'id': '9ODmcdjQcHQ',
604                 'ext': 'mp4',
605                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
606                 'upload_date': '20140225',
607                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
608                 'uploader': 'Tested',
609                 'uploader_id': 'testedcom',
610             },
611             # No need to test YoutubeIE here
612             'params': {
613                 'skip_download': True,
614             },
615         },
616         # funnyordie embed
617         {
618             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
619             'info_dict': {
620                 'id': '18e820ec3f',
621                 'ext': 'mp4',
622                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
623                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
624             },
625             # HEAD requests lead to endless 301, while GET is OK
626             'expected_warnings': ['301'],
627         },
628         # RUTV embed
629         {
630             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
631             'info_dict': {
632                 'id': '776940',
633                 'ext': 'mp4',
634                 'title': 'Охотское море стало целиком российским',
635                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
636             },
637             'params': {
638                 # m3u8 download
639                 'skip_download': True,
640             },
641         },
642         # TVC embed
643         {
644             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
645             'info_dict': {
646                 'id': '55304',
647                 'ext': 'mp4',
648                 'title': 'Дошкольное воспитание',
649             },
650         },
651         # SportBox embed
652         {
653             'url': 'http://www.vestifinance.ru/articles/25753',
654             'info_dict': {
655                 'id': '25753',
656                 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
657             },
658             'playlist': [{
659                 'info_dict': {
660                     'id': '370908',
661                     'title': 'Госзаказ. День 3',
662                     'ext': 'mp4',
663                 }
664             }, {
665                 'info_dict': {
666                     'id': '370905',
667                     'title': 'Госзаказ. День 2',
668                     'ext': 'mp4',
669                 }
670             }, {
671                 'info_dict': {
672                     'id': '370902',
673                     'title': 'Госзаказ. День 1',
674                     'ext': 'mp4',
675                 }
676             }],
677             'params': {
678                 # m3u8 download
679                 'skip_download': True,
680             },
681         },
682         # Myvi.ru embed
683         {
684             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
685             'info_dict': {
686                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
687                 'ext': 'mp4',
688                 'title': 'Ужастики, русский трейлер (2015)',
689                 'thumbnail': r're:^https?://.*\.jpg$',
690                 'duration': 153,
691             }
692         },
693         # XHamster embed
694         {
695             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
696             'info_dict': {
697                 'id': 'showthread',
698                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
699             },
700             'playlist_mincount': 7,
701             # This forum does not allow <iframe> syntaxes anymore
702             # Now HTML tags are displayed as-is
703             'skip': 'No videos on this page',
704         },
705         # Embedded TED video
706         {
707             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
708             'md5': '65fdff94098e4a607385a60c5177c638',
709             'info_dict': {
710                 'id': '1969',
711                 'ext': 'mp4',
712                 'title': 'Hidden miracles of the natural world',
713                 'uploader': 'Louie Schwartzberg',
714                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
715             }
716         },
717         # nowvideo embed hidden behind percent encoding
718         {
719             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
720             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
721             'info_dict': {
722                 'id': '06e53103ca9aa',
723                 'ext': 'flv',
724                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
725                 'description': 'No description',
726             },
727         },
728         # arte embed
729         {
730             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
731             'md5': '7653032cbb25bf6c80d80f217055fa43',
732             'info_dict': {
733                 'id': '048195-004_PLUS7-F',
734                 'ext': 'flv',
735                 'title': 'X:enius',
736                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
737                 'upload_date': '20140320',
738             },
739             'params': {
740                 'skip_download': 'Requires rtmpdump'
741             },
742             'skip': 'video gone',
743         },
744         # francetv embed
745         {
746             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
747             'info_dict': {
748                 'id': 'EV_30231',
749                 'ext': 'mp4',
750                 'title': 'Alcaline, le concert avec Calogero',
751                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
752                 'upload_date': '20150226',
753                 'timestamp': 1424989860,
754                 'duration': 5400,
755             },
756             'params': {
757                 # m3u8 downloads
758                 'skip_download': True,
759             },
760             'expected_warnings': [
761                 'Forbidden'
762             ]
763         },
764         # Condé Nast embed
765         {
766             'url': 'http://www.wired.com/2014/04/honda-asimo/',
767             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
768             'info_dict': {
769                 'id': '53501be369702d3275860000',
770                 'ext': 'mp4',
771                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
772             }
773         },
774         # Dailymotion embed
775         {
776             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
777             'md5': '441aeeb82eb72c422c7f14ec533999cd',
778             'info_dict': {
779                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
780                 'ext': 'mp4',
781                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
782                 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
783                 'uploader': 'Spi0n',
784                 'uploader_id': 'xgditw',
785                 'upload_date': '20140425',
786                 'timestamp': 1398441542,
787             },
788             'add_ie': ['Dailymotion'],
789         },
790         # DailyMail embed
791         {
792             'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
793             'info_dict': {
794                 'id': '1495629',
795                 'ext': 'mp4',
796                 'title': 'Care worker punches elderly dementia patient in head 11 times',
797                 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
798             },
799             'add_ie': ['DailyMail'],
800             'params': {
801                 'skip_download': True,
802             },
803         },
804         # YouTube embed
805         {
806             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
807             'info_dict': {
808                 'id': 'FXRb4ykk4S0',
809                 'ext': 'mp4',
810                 'title': 'The NBL Auction 2014',
811                 'uploader': 'BADMINTON England',
812                 'uploader_id': 'BADMINTONEvents',
813                 'upload_date': '20140603',
814                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
815             },
816             'add_ie': ['Youtube'],
817             'params': {
818                 'skip_download': True,
819             }
820         },
821         # MTVSercices embed
822         {
823             'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
824             'md5': 'ca1aef97695ef2c1d6973256a57e5252',
825             'info_dict': {
826                 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
827                 'ext': 'mp4',
828                 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
829                 'description': 'Two valets share their love for movie star Liam Neesons.',
830                 'timestamp': 1349922600,
831                 'upload_date': '20121011',
832             },
833         },
834         # YouTube embed via <data-embed-url="">
835         {
836             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
837             'info_dict': {
838                 'id': '4vAffPZIT44',
839                 'ext': 'mp4',
840                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
841                 'uploader': 'Gameloft',
842                 'uploader_id': 'gameloft',
843                 'upload_date': '20140828',
844                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
845             },
846             'params': {
847                 'skip_download': True,
848             }
849         },
850         # YouTube <object> embed
851         {
852             'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
853             'md5': '516718101ec834f74318df76259fb3cc',
854             'info_dict': {
855                 'id': 'msN87y-iEx0',
856                 'ext': 'webm',
857                 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
858                 'upload_date': '20080526',
859                 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
860                 'uploader': 'Christopher Sykes',
861                 'uploader_id': 'ChristopherJSykes',
862             },
863             'add_ie': ['Youtube'],
864         },
865         # Camtasia studio
866         {
867             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
868             'playlist': [{
869                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
870                 'info_dict': {
871                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
872                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
873                     'ext': 'flv',
874                     'duration': 2235.90,
875                 }
876             }, {
877                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
878                 'info_dict': {
879                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
880                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
881                     'ext': 'flv',
882                     'duration': 2235.93,
883                 }
884             }],
885             'info_dict': {
886                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
887             }
888         },
889         # Flowplayer
890         {
891             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
892             'md5': '9d65602bf31c6e20014319c7d07fba27',
893             'info_dict': {
894                 'id': '5123ea6d5e5a7',
895                 'ext': 'mp4',
896                 'age_limit': 18,
897                 'uploader': 'www.handjobhub.com',
898                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
899             }
900         },
901         # Multiple brightcove videos
902         # https://github.com/rg3/youtube-dl/issues/2283
903         {
904             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
905             'info_dict': {
906                 'id': 'always-never',
907                 'title': 'Always / Never - The New Yorker',
908             },
909             'playlist_count': 3,
910             'params': {
911                 'extract_flat': False,
912                 'skip_download': True,
913             }
914         },
915         # MLB embed
916         {
917             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
918             'md5': '96f09a37e44da40dd083e12d9a683327',
919             'info_dict': {
920                 'id': '33322633',
921                 'ext': 'mp4',
922                 'title': 'Ump changes call to ball',
923                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
924                 'duration': 48,
925                 'timestamp': 1401537900,
926                 'upload_date': '20140531',
927                 'thumbnail': r're:^https?://.*\.jpg$',
928             },
929         },
930         # Wistia embed
931         {
932             'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
933             'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
934             'info_dict': {
935                 'id': '6e2wtrbdaf',
936                 'ext': 'mov',
937                 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
938                 'description': 'a Paywall Videos video from Remilon',
939                 'duration': 644.072,
940                 'uploader': 'study.com',
941                 'timestamp': 1459678540,
942                 'upload_date': '20160403',
943                 'filesize': 24687186,
944             },
945         },
946         {
947             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
948             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
949             'info_dict': {
950                 'id': 'uxjb0lwrcz',
951                 'ext': 'mp4',
952                 'title': 'Conversation about Hexagonal Rails Part 1',
953                 'description': 'a Martin Fowler video from ThoughtWorks',
954                 'duration': 1715.0,
955                 'uploader': 'thoughtworks.wistia.com',
956                 'timestamp': 1401832161,
957                 'upload_date': '20140603',
958             },
959         },
960         # Wistia standard embed (async)
961         {
962             'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
963             'info_dict': {
964                 'id': '807fafadvk',
965                 'ext': 'mp4',
966                 'title': 'Drip Brennan Dunn Workshop',
967                 'description': 'a JV Webinars video from getdrip-1',
968                 'duration': 4986.95,
969                 'timestamp': 1463607249,
970                 'upload_date': '20160518',
971             },
972             'params': {
973                 'skip_download': True,
974             }
975         },
976         # Soundcloud embed
977         {
978             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
979             'info_dict': {
980                 'id': '174391317',
981                 'ext': 'mp3',
982                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
983                 'uploader': 'Sophos Security',
984                 'title': 'Chet Chat 171 - Oct 29, 2014',
985                 'upload_date': '20141029',
986             }
987         },
988         # Soundcloud multiple embeds
989         {
990             'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
991             'info_dict': {
992                 'id': '52809',
993                 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance  | TAB + AUDIO',
994             },
995             'playlist_mincount': 7,
996         },
997         # TuneIn station embed
998         {
999             'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
1000             'info_dict': {
1001                 'id': '204146',
1002                 'ext': 'mp3',
1003                 'title': 'CNRV',
1004                 'location': 'Paris, France',
1005                 'is_live': True,
1006             },
1007             'params': {
1008                 # Live stream
1009                 'skip_download': True,
1010             },
1011         },
1012         # Livestream embed
1013         {
1014             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
1015             'info_dict': {
1016                 'id': '67864563',
1017                 'ext': 'flv',
1018                 'upload_date': '20141112',
1019                 'title': 'Rosetta #CometLanding webcast HL 10',
1020             }
1021         },
1022         # Another Livestream embed, without 'new.' in URL
1023         {
1024             'url': 'https://www.freespeech.org/',
1025             'info_dict': {
1026                 'id': '123537347',
1027                 'ext': 'mp4',
1028                 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
1029             },
1030             'params': {
1031                 # Live stream
1032                 'skip_download': True,
1033             },
1034         },
1035         # LazyYT
1036         {
1037             'url': 'https://skiplagged.com/',
1038             'info_dict': {
1039                 'id': 'skiplagged',
1040                 'title': 'Skiplagged: The smart way to find cheap flights',
1041             },
1042             'playlist_mincount': 1,
1043             'add_ie': ['Youtube'],
1044         },
1045         # Cinchcast embed
1046         {
1047             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
1048             'info_dict': {
1049                 'id': '7141703',
1050                 'ext': 'mp3',
1051                 'upload_date': '20141126',
1052                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
1053             }
1054         },
1055         # Cinerama player
1056         {
1057             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
1058             'info_dict': {
1059                 'id': '730m_DandD_1901_512k',
1060                 'ext': 'mp4',
1061                 'uploader': 'www.abc.net.au',
1062                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
1063             }
1064         },
1065         # embedded viddler video
1066         {
1067             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
1068             'info_dict': {
1069                 'id': '4d03aad9',
1070                 'ext': 'mp4',
1071                 'uploader': 'deadspin',
1072                 'title': 'WALL-TO-GORTAT',
1073                 'timestamp': 1422285291,
1074                 'upload_date': '20150126',
1075             },
1076             'add_ie': ['Viddler'],
1077         },
1078         # Libsyn embed
1079         {
1080             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
1081             'info_dict': {
1082                 'id': '3377616',
1083                 'ext': 'mp3',
1084                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
1085                 'description': 'md5:601cb790edd05908957dae8aaa866465',
1086                 'upload_date': '20150220',
1087             },
1088             'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
1089         },
1090         # jwplayer YouTube
1091         {
1092             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
1093             'info_dict': {
1094                 'id': 'Mrj4DVp2zeA',
1095                 'ext': 'mp4',
1096                 'upload_date': '20150212',
1097                 'uploader': 'The National Archives UK',
1098                 'description': 'md5:8078af856dca76edc42910b61273dbbf',
1099                 'uploader_id': 'NationalArchives08',
1100                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
1101             },
1102         },
1103         # jwplayer rtmp
1104         {
1105             'url': 'http://www.suffolk.edu/sjc/live.php',
1106             'info_dict': {
1107                 'id': 'live',
1108                 'ext': 'flv',
1109                 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
1110                 'uploader': 'www.suffolk.edu',
1111             },
1112             'params': {
1113                 'skip_download': True,
1114             },
1115             'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
1116         },
1117         # Complex jwplayer
1118         {
1119             'url': 'http://www.indiedb.com/games/king-machine/videos',
1120             'info_dict': {
1121                 'id': 'videos',
1122                 'ext': 'mp4',
1123                 'title': 'king machine trailer 1',
1124                 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
1125                 'thumbnail': r're:^https?://.*\.jpg$',
1126             },
1127         },
1128         {
1129             # JWPlayer config passed as variable
1130             'url': 'http://www.txxx.com/videos/3326530/ariele/',
1131             'info_dict': {
1132                 'id': '3326530_hq',
1133                 'ext': 'mp4',
1134                 'title': 'ARIELE | Tube Cup',
1135                 'uploader': 'www.txxx.com',
1136                 'age_limit': 18,
1137             },
1138             'params': {
1139                 'skip_download': True,
1140             }
1141         },
1142         {
1143             # JWPlatform iframe
1144             'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
1145             'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
1146             'info_dict': {
1147                 'id': 'O0c5JcKT',
1148                 'ext': 'mp4',
1149                 'upload_date': '20171122',
1150                 'timestamp': 1511366290,
1151                 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
1152             },
1153             'add_ie': [JWPlatformIE.ie_key()],
1154         },
1155         {
1156             # Video.js embed, multiple formats
1157             'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
1158             'info_dict': {
1159                 'id': 'yygqldloqIk',
1160                 'ext': 'mp4',
1161                 'title': 'SolidWorks. Урок 6 Настройка чертежа',
1162                 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
1163                 'upload_date': '20130314',
1164                 'uploader': 'PROстое3D',
1165                 'uploader_id': 'PROstoe3D',
1166             },
1167             'params': {
1168                 'skip_download': True,
1169             },
1170         },
1171         {
1172             # Video.js embed, single format
1173             'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
1174             'info_dict': {
1175                 'id': 'watch',
1176                 'ext': 'mp4',
1177                 'title': 'Step 1 -  Good Foundation',
1178                 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
1179             },
1180             'params': {
1181                 'skip_download': True,
1182             },
1183         },
1184         # rtl.nl embed
1185         {
1186             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
1187             'playlist_mincount': 5,
1188             'info_dict': {
1189                 'id': 'aanslagen-kopenhagen',
1190                 'title': 'Aanslagen Kopenhagen',
1191             }
1192         },
1193         # Zapiks embed
1194         {
1195             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
1196             'info_dict': {
1197                 'id': '118046',
1198                 'ext': 'mp4',
1199                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
1200             }
1201         },
1202         # Kaltura embed (different embed code)
1203         {
1204             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
1205             'info_dict': {
1206                 'id': '1_a52wc67y',
1207                 'ext': 'flv',
1208                 'upload_date': '20150127',
1209                 'uploader_id': 'PremierMedia',
1210                 'timestamp': int,
1211                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
1212             },
1213         },
1214         # Kaltura embed with single quotes
1215         {
1216             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
1217             'info_dict': {
1218                 'id': '0_izeg5utt',
1219                 'ext': 'mp4',
1220                 'title': '35871',
1221                 'timestamp': 1355743100,
1222                 'upload_date': '20121217',
1223                 'uploader_id': 'cplapp@learn360.com',
1224             },
1225             'add_ie': ['Kaltura'],
1226         },
1227         {
1228             # Kaltura embedded via quoted entry_id
1229             'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1230             'info_dict': {
1231                 'id': '0_utuok90b',
1232                 'ext': 'mp4',
1233                 'title': '06_matthew_brender_raj_dutt',
1234                 'timestamp': 1466638791,
1235                 'upload_date': '20160622',
1236             },
1237             'add_ie': ['Kaltura'],
1238             'expected_warnings': [
1239                 'Could not send HEAD request'
1240             ],
1241             'params': {
1242                 'skip_download': True,
1243             }
1244         },
1245         {
1246             # Kaltura embedded, some fileExt broken (#11480)
1247             'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1248             'info_dict': {
1249                 'id': '1_sgtvehim',
1250                 'ext': 'mp4',
1251                 'title': 'Our "Standard Models" of particle physics and cosmology',
1252                 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1253                 'timestamp': 1321158993,
1254                 'upload_date': '20111113',
1255                 'uploader_id': 'kps1',
1256             },
1257             'add_ie': ['Kaltura'],
1258         },
1259         {
1260             # Kaltura iframe embed
1261             'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
1262             'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
1263             'info_dict': {
1264                 'id': '0_f2cfbpwy',
1265                 'ext': 'mp4',
1266                 'title': 'I. M. Pei: A Centennial Celebration',
1267                 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
1268                 'upload_date': '20170403',
1269                 'uploader_id': 'batchUser',
1270                 'timestamp': 1491232186,
1271             },
1272             'add_ie': ['Kaltura'],
1273         },
1274         {
1275             # meta twitter:player
1276             'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
1277             'info_dict': {
1278                 'id': '0_01b42zps',
1279                 'ext': 'mp4',
1280                 'title': 'Main Twerk (Video)',
1281                 'upload_date': '20171208',
1282                 'uploader_id': 'sebastian.salinas@thechive.com',
1283                 'timestamp': 1512713057,
1284             },
1285             'params': {
1286                 'skip_download': True,
1287             },
1288             'add_ie': ['Kaltura'],
1289         },
1290         # referrer protected EaglePlatform embed
1291         {
1292             'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
1293             'info_dict': {
1294                 'id': '582306',
1295                 'ext': 'mp4',
1296                 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
1297                 'thumbnail': r're:^https?://.*\.jpg$',
1298                 'duration': 3382,
1299                 'view_count': int,
1300             },
1301             'params': {
1302                 'skip_download': True,
1303             },
1304         },
1305         # ClipYou (EaglePlatform) embed (custom URL)
1306         {
1307             'url': 'http://muz-tv.ru/play/7129/',
1308             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1309             'info_dict': {
1310                 'id': '12820',
1311                 'ext': 'mp4',
1312                 'title': "'O Sole Mio",
1313                 'thumbnail': r're:^https?://.*\.jpg$',
1314                 'duration': 216,
1315                 'view_count': int,
1316             },
1317             'params': {
1318                 'skip_download': True,
1319             },
1320             'skip': 'This video is unavailable.',
1321         },
1322         # Pladform embed
1323         {
1324             'url': 'http://muz-tv.ru/kinozal/view/7400/',
1325             'info_dict': {
1326                 'id': '100183293',
1327                 'ext': 'mp4',
1328                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
1329                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
1330                 'thumbnail': r're:^https?://.*\.jpg$',
1331                 'duration': 694,
1332                 'age_limit': 0,
1333             },
1334             'skip': 'HTTP Error 404: Not Found',
1335         },
1336         # Playwire embed
1337         {
1338             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1339             'info_dict': {
1340                 'id': '3519514',
1341                 'ext': 'mp4',
1342                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
1343                 'thumbnail': r're:^https?://.*\.png$',
1344                 'duration': 45.115,
1345             },
1346         },
1347         # 5min embed
1348         {
1349             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1350             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1351             'info_dict': {
1352                 'id': '518726732',
1353                 'ext': 'mp4',
1354                 'title': 'Facebook Creates "On This Day" | Crunch Report',
1355                 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
1356                 'timestamp': 1427237531,
1357                 'uploader': 'Crunch Report',
1358                 'upload_date': '20150324',
1359             },
1360             'params': {
1361                 # m3u8 download
1362                 'skip_download': True,
1363             },
1364         },
1365         # SVT embed
1366         {
1367             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
1368             'info_dict': {
1369                 'id': '2900353',
1370                 'ext': 'flv',
1371                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
1372                 'duration': 27,
1373                 'age_limit': 0,
1374             },
1375         },
1376         # Crooks and Liars embed
1377         {
1378             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1379             'info_dict': {
1380                 'id': '8RUoRhRi',
1381                 'ext': 'mp4',
1382                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1383                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1384                 'timestamp': 1428207000,
1385                 'upload_date': '20150405',
1386                 'uploader': 'Heather',
1387             },
1388         },
1389         # Crooks and Liars external embed
1390         {
1391             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1392             'info_dict': {
1393                 'id': 'MTE3MjUtMzQ2MzA',
1394                 'ext': 'mp4',
1395                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1396                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1397                 'timestamp': 1265032391,
1398                 'upload_date': '20100201',
1399                 'uploader': 'Heather',
1400             },
1401         },
1402         # NBC Sports vplayer embed
1403         {
1404             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1405             'info_dict': {
1406                 'id': 'ln7x1qSThw4k',
1407                 'ext': 'flv',
1408                 'title': "PFT Live: New leader in the 'new-look' defense",
1409                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1410                 'uploader': 'NBCU-SPORTS',
1411                 'upload_date': '20140107',
1412                 'timestamp': 1389118457,
1413             },
1414             'skip': 'Invalid Page URL',
1415         },
1416         # NBC News embed
1417         {
1418             'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1419             'md5': '1aa589c675898ae6d37a17913cf68d66',
1420             'info_dict': {
1421                 'id': 'x_dtl_oa_LettermanliftPR_160608',
1422                 'ext': 'mp4',
1423                 'title': 'David Letterman: A Preview',
1424                 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
1425                 'upload_date': '20160609',
1426                 'timestamp': 1465431544,
1427                 'uploader': 'NBCU-NEWS',
1428             },
1429         },
1430         # UDN embed
1431         {
1432             'url': 'https://video.udn.com/news/300346',
1433             'md5': 'fd2060e988c326991037b9aff9df21a6',
1434             'info_dict': {
1435                 'id': '300346',
1436                 'ext': 'mp4',
1437                 'title': '中一中男師變性 全校師生力挺',
1438                 'thumbnail': r're:^https?://.*\.jpg$',
1439             },
1440             'params': {
1441                 # m3u8 download
1442                 'skip_download': True,
1443             },
1444             'expected_warnings': ['Failed to parse JSON Expecting value'],
1445         },
1446         # Ooyala embed
1447         {
1448             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1449             'info_dict': {
1450                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1451                 'ext': 'mp4',
1452                 'description': 'Index/Match versus VLOOKUP.',
1453                 'title': 'This is what separates the Excel masters from the wannabes',
1454                 'duration': 191.933,
1455             },
1456             'params': {
1457                 # m3u8 downloads
1458                 'skip_download': True,
1459             }
1460         },
1461         # Brightcove URL in single quotes
1462         {
1463             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1464             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1465             'info_dict': {
1466                 'id': '4255764656001',
1467                 'ext': 'mp4',
1468                 'title': 'SN Presents: Russell Martin, World Citizen',
1469                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1470                 'uploader': 'Rogers Sportsnet',
1471                 'uploader_id': '1704050871',
1472                 'upload_date': '20150525',
1473                 'timestamp': 1432570283,
1474             },
1475         },
1476         # OnionStudios embed
1477         {
1478             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1479             'info_dict': {
1480                 'id': '2855',
1481                 'ext': 'mp4',
1482                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1483                 'thumbnail': r're:^https?://.*\.jpe?g$',
1484                 'uploader': 'ClickHole',
1485                 'uploader_id': 'clickhole',
1486             }
1487         },
1488         # SnagFilms embed
1489         {
1490             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1491             'info_dict': {
1492                 'id': '74849a00-85a9-11e1-9660-123139220831',
1493                 'ext': 'mp4',
1494                 'title': '#whilewewatch',
1495             }
1496         },
1497         # AdobeTVVideo embed
1498         {
1499             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1500             'md5': '43662b577c018ad707a63766462b1e87',
1501             'info_dict': {
1502                 'id': '2456',
1503                 'ext': 'mp4',
1504                 'title': 'New experience with Acrobat DC',
1505                 'description': 'New experience with Acrobat DC',
1506                 'duration': 248.667,
1507             },
1508         },
1509         # BrightcoveInPageEmbed embed
1510         {
1511             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1512             'info_dict': {
1513                 'id': '4238694884001',
1514                 'ext': 'flv',
1515                 'title': 'Tabletop: Dread, Last Thoughts',
1516                 'description': 'Tabletop: Dread, Last Thoughts',
1517                 'duration': 51690,
1518             },
1519         },
1520         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1521         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1522         {
1523             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1524             'info_dict': {
1525                 'id': '4785848093001',
1526                 'ext': 'mp4',
1527                 'title': 'The Cardinal Pell Interview',
1528                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1529                 'uploader': 'GlobeCast Australia - GlobeStream',
1530                 'uploader_id': '2733773828001',
1531                 'upload_date': '20160304',
1532                 'timestamp': 1457083087,
1533             },
1534             'params': {
1535                 # m3u8 downloads
1536                 'skip_download': True,
1537             },
1538         },
1539         {
1540             # Brightcove embed with whitespace around attribute names
1541             'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
1542             'info_dict': {
1543                 'id': '3167554373001',
1544                 'ext': 'mp4',
1545                 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
1546                 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
1547                 'uploader_id': '1079349493',
1548                 'upload_date': '20140207',
1549                 'timestamp': 1391810548,
1550             },
1551             'params': {
1552                 'skip_download': True,
1553             },
1554         },
1555         # Another form of arte.tv embed
1556         {
1557             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1558             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1559             'info_dict': {
1560                 'id': '030273-562_PLUS7-F',
1561                 'ext': 'mp4',
1562                 'title': 'ARTE Reportage - Nulle part, en France',
1563                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1564                 'upload_date': '20160409',
1565             },
1566         },
1567         # LiveLeak embed
1568         {
1569             'url': 'http://www.wykop.pl/link/3088787/',
1570             'md5': '7619da8c820e835bef21a1efa2a0fc71',
1571             'info_dict': {
1572                 'id': '874_1459135191',
1573                 'ext': 'mp4',
1574                 'title': 'Man shows poor quality of new apartment building',
1575                 'description': 'The wall is like a sand pile.',
1576                 'uploader': 'Lake8737',
1577             },
1578             'add_ie': [LiveLeakIE.ie_key()],
1579         },
1580         # Another LiveLeak embed pattern (#13336)
1581         {
1582             'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
1583             'info_dict': {
1584                 'id': '2eb_1496309988',
1585                 'ext': 'mp4',
1586                 'title': 'Thief robs place where everyone was armed',
1587                 'description': 'md5:694d73ee79e535953cf2488562288eee',
1588                 'uploader': 'brazilwtf',
1589             },
1590             'add_ie': [LiveLeakIE.ie_key()],
1591         },
1592         # Duplicated embedded video URLs
1593         {
1594             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1595             'info_dict': {
1596                 'id': '149298443_480_16c25b74_2',
1597                 'ext': 'mp4',
1598                 'title': 'vs. Blue Orange Spring Game',
1599                 'uploader': 'www.hudl.com',
1600             },
1601         },
1602         # twitter:player:stream embed
1603         {
1604             'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1605             'info_dict': {
1606                 'id': 'master',
1607                 'ext': 'mp4',
1608                 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1609                 'uploader': 'www.rtl.be',
1610             },
1611             'params': {
1612                 # m3u8 downloads
1613                 'skip_download': True,
1614             },
1615         },
1616         # twitter:player embed
1617         {
1618             'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1619             'md5': 'a3e0df96369831de324f0778e126653c',
1620             'info_dict': {
1621                 'id': '4909620399001',
1622                 'ext': 'mp4',
1623                 'title': 'What Do Black Holes Sound Like?',
1624                 'description': 'what do black holes sound like',
1625                 'upload_date': '20160524',
1626                 'uploader_id': '29913724001',
1627                 'timestamp': 1464107587,
1628                 'uploader': 'TheAtlantic',
1629             },
1630             'add_ie': ['BrightcoveLegacy'],
1631         },
1632         # Facebook <iframe> embed
1633         {
1634             'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
1635             'md5': 'fbcde74f534176ecb015849146dd3aee',
1636             'info_dict': {
1637                 'id': '599637780109885',
1638                 'ext': 'mp4',
1639                 'title': 'Facebook video #599637780109885',
1640             },
1641         },
1642         # Facebook <iframe> embed, plugin video
1643         {
1644             'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
1645             'info_dict': {
1646                 'id': '1754168231264132',
1647                 'ext': 'mp4',
1648                 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
1649                 'uploader': 'Tariq Ramadan (official)',
1650                 'timestamp': 1496758379,
1651                 'upload_date': '20170606',
1652             },
1653             'params': {
1654                 'skip_download': True,
1655             },
1656         },
1657         # Facebook API embed
1658         {
1659             'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
1660             'md5': 'a47372ee61b39a7b90287094d447d94e',
1661             'info_dict': {
1662                 'id': '10153467542406923',
1663                 'ext': 'mp4',
1664                 'title': 'Facebook video #10153467542406923',
1665             },
1666         },
1667         # Wordpress "YouTube Video Importer" plugin
1668         {
1669             'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
1670             'md5': 'd16797741b560b485194eddda8121b48',
1671             'info_dict': {
1672                 'id': 'HNTXWDXV9Is',
1673                 'ext': 'mp4',
1674                 'title': 'Blue Devils Drumline Stanford lot 2016',
1675                 'upload_date': '20160627',
1676                 'uploader_id': 'GENOCIDE8GENERAL10',
1677                 'uploader': 'cylus cyrus',
1678             },
1679         },
1680         {
1681             # video stored on custom kaltura server
1682             'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1683             'md5': '537617d06e64dfed891fa1593c4b30cc',
1684             'info_dict': {
1685                 'id': '0_1iotm5bh',
1686                 'ext': 'mp4',
1687                 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1688                 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1689                 'uploader_id': 'videos.expansion@el-mundo.net',
1690                 'upload_date': '20150429',
1691                 'timestamp': 1430303472,
1692             },
1693             'add_ie': ['Kaltura'],
1694         },
1695         {
1696             # Non-standard Vimeo embed
1697             'url': 'https://openclassrooms.com/courses/understanding-the-web',
1698             'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1699             'info_dict': {
1700                 'id': '148867247',
1701                 'ext': 'mp4',
1702                 'title': 'Understanding the web - Teaser',
1703                 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1704                 'upload_date': '20151214',
1705                 'uploader': 'OpenClassrooms',
1706                 'uploader_id': 'openclassrooms',
1707             },
1708             'add_ie': ['Vimeo'],
1709         },
1710         {
1711             # generic vimeo embed that requires original URL passed as Referer
1712             'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1713             'only_matching': True,
1714         },
1715         {
1716             'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1717             'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1718             'info_dict': {
1719                 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1720                 'ext': 'mp4',
1721                 'title': 'Big Buck Bunny',
1722                 'description': 'Royalty free test video',
1723                 'timestamp': 1432816365,
1724                 'upload_date': '20150528',
1725                 'is_live': False,
1726             },
1727             'params': {
1728                 'skip_download': True,
1729             },
1730             'add_ie': [ArkenaIE.ie_key()],
1731         },
1732         {
1733             'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1734             'info_dict': {
1735                 'id': '1c7141f46c',
1736                 'ext': 'mp4',
1737                 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1738             },
1739             'params': {
1740                 'skip_download': True,
1741             },
1742             'add_ie': [Vbox7IE.ie_key()],
1743         },
1744         {
1745             # DBTV embeds
1746             'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
1747             'info_dict': {
1748                 'id': '43254897',
1749                 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1750             },
1751             'playlist_mincount': 3,
1752         },
1753         {
1754             # Videa embeds
1755             'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1756             'info_dict': {
1757                 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1758                 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1759             },
1760             'playlist_mincount': 2,
1761         },
1762         {
1763             # 20 minuten embed
1764             'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1765             'info_dict': {
1766                 'id': '523629',
1767                 'ext': 'mp4',
1768                 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1769                 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1770             },
1771             'params': {
1772                 'skip_download': True,
1773             },
1774             'add_ie': [TwentyMinutenIE.ie_key()],
1775         },
1776         {
1777             # VideoPress embed
1778             'url': 'https://en.support.wordpress.com/videopress/',
1779             'info_dict': {
1780                 'id': 'OcobLTqC',
1781                 'ext': 'm4v',
1782                 'title': 'IMG_5786',
1783                 'timestamp': 1435711927,
1784                 'upload_date': '20150701',
1785             },
1786             'params': {
1787                 'skip_download': True,
1788             },
1789             'add_ie': [VideoPressIE.ie_key()],
1790         },
1791         {
1792             # Rutube embed
1793             'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
1794             'info_dict': {
1795                 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
1796                 'ext': 'flv',
1797                 'title': 'Магаззино: Казань 2',
1798                 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
1799                 'uploader': 'Магаззино',
1800                 'upload_date': '20170228',
1801                 'uploader_id': '996642',
1802             },
1803             'params': {
1804                 'skip_download': True,
1805             },
1806             'add_ie': [RutubeIE.ie_key()],
1807         },
1808         {
1809             # ThePlatform embedded with whitespaces in URLs
1810             'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
1811             'only_matching': True,
1812         },
1813         {
1814             # Senate ISVP iframe https
1815             'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
1816             'md5': 'fb8c70b0b515e5037981a2492099aab8',
1817             'info_dict': {
1818                 'id': 'govtaff020316',
1819                 'ext': 'mp4',
1820                 'title': 'Integrated Senate Video Player',
1821             },
1822             'add_ie': [SenateISVPIE.ie_key()],
1823         },
1824         {
1825             # Limelight embeds (1 channel embed + 4 media embeds)
1826             'url': 'http://www.sedona.com/FacilitatorTraining2017',
1827             'info_dict': {
1828                 'id': 'FacilitatorTraining2017',
1829                 'title': 'Facilitator Training 2017',
1830             },
1831             'playlist_mincount': 5,
1832         },
1833         {
1834             # Limelight embed (LimelightPlayerUtil.embed)
1835             'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
1836             'info_dict': {
1837                 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
1838                 'ext': 'mp4',
1839                 'title': '07448641',
1840                 'timestamp': 1499890639,
1841                 'upload_date': '20170712',
1842             },
1843             'params': {
1844                 'skip_download': True,
1845             },
1846             'add_ie': ['LimelightMedia'],
1847         },
1848         {
1849             'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
1850             'info_dict': {
1851                 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
1852                 'title': 'Standoff with Walnut Creek murder suspect ends',
1853                 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
1854             },
1855             'playlist_mincount': 4,
1856         },
1857         {
1858             # WashingtonPost embed
1859             'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
1860             'info_dict': {
1861                 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
1862                 'ext': 'mp4',
1863                 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
1864                 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
1865                 'timestamp': 1455216756,
1866                 'uploader': 'The Washington Post',
1867                 'upload_date': '20160211',
1868             },
1869             'add_ie': [WashingtonPostIE.ie_key()],
1870         },
1871         {
1872             # Mediaset embed
1873             'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
1874             'info_dict': {
1875                 'id': '720642',
1876                 'ext': 'mp4',
1877                 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
1878             },
1879             'params': {
1880                 'skip_download': True,
1881             },
1882             'add_ie': [MediasetIE.ie_key()],
1883         },
1884         {
1885             # JOJ.sk embeds
1886             'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
1887             'info_dict': {
1888                 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
1889                 'title': 'Slovenskom sa prehnala vlna silných búrok',
1890             },
1891             'playlist_mincount': 5,
1892             'add_ie': [JojIE.ie_key()],
1893         },
1894         {
1895             # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
1896             'url': 'https://tvrain.ru/amp/418921/',
1897             'md5': 'cc00413936695987e8de148b67d14f1d',
1898             'info_dict': {
1899                 'id': '418921',
1900                 'ext': 'mp4',
1901                 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
1902             },
1903         },
1904         {
1905             # vzaar embed
1906             'url': 'http://help.vzaar.com/article/165-embedding-video',
1907             'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
1908             'info_dict': {
1909                 'id': '8707641',
1910                 'ext': 'mp4',
1911                 'title': 'Building A Business Online: Principal Chairs Q & A',
1912             },
1913         },
1914         {
1915             # multiple HTML5 videos on one page
1916             'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
1917             'info_dict': {
1918                 'id': 'keyscenarios',
1919                 'title': 'Rescue Kit 14 Free Edition - Getting started',
1920             },
1921             'playlist_count': 4,
1922         },
1923         {
1924             # vshare embed
1925             'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
1926             'md5': '17b39f55b5497ae8b59f5fbce8e35886',
1927             'info_dict': {
1928                 'id': '0f64ce6',
1929                 'title': 'vl14062007715967',
1930                 'ext': 'mp4',
1931             }
1932         },
1933         {
1934             'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
1935             'md5': 'aecd089f55b1cb5a59032cb049d3a356',
1936             'info_dict': {
1937                 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
1938                 'ext': 'mp4',
1939                 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
1940                 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
1941                 'timestamp': 1474354800,
1942                 'upload_date': '20160920',
1943             }
1944         },
1945         {
1946             'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
1947             'info_dict': {
1948                 'id': '1731611',
1949                 'ext': 'mp4',
1950                 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
1951                 'description': 'md5:eb5f23826a027ba95277d105f248b825',
1952                 'timestamp': 1516100691,
1953                 'upload_date': '20180116',
1954             },
1955             'params': {
1956                 'skip_download': True,
1957             },
1958             'add_ie': [SpringboardPlatformIE.ie_key()],
1959         },
1960         {
1961             'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
1962             'info_dict': {
1963                 'id': 'uPDB5I9wfp8',
1964                 'ext': 'webm',
1965                 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
1966                 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
1967                 'upload_date': '20160219',
1968                 'uploader': 'Pocoyo - Português (BR)',
1969                 'uploader_id': 'PocoyoBrazil',
1970             },
1971             'add_ie': [YoutubeIE.ie_key()],
1972             'params': {
1973                 'skip_download': True,
1974             },
1975         },
1976         {
1977             'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
1978             'info_dict': {
1979                 'id': 'vMDE4NzI1Mjgt690b',
1980                 'ext': 'mp4',
1981                 'title': 'Котята',
1982             },
1983             'add_ie': [YapFilesIE.ie_key()],
1984             'params': {
1985                 'skip_download': True,
1986             },
1987         },
1988         {
1989             'url': 'http://share-videos.se/auto/video/83645793?uid=13',
1990             'md5': 'b68d276de422ab07ee1d49388103f457',
1991             'info_dict': {
1992                 'id': '83645793',
1993                 'title': 'Lock up and get excited',
1994                 'ext': 'mp4'
1995             },
1996             'skip': 'TODO: fix nested playlists processing in tests',
1997         },
1998         # {
1999         #     # TODO: find another test
2000         #     # http://schema.org/VideoObject
2001         #     'url': 'https://flipagram.com/f/nyvTSJMKId',
2002         #     'md5': '888dcf08b7ea671381f00fab74692755',
2003         #     'info_dict': {
2004         #         'id': 'nyvTSJMKId',
2005         #         'ext': 'mp4',
2006         #         'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
2007         #         'description': '#love for cats.',
2008         #         'timestamp': 1461244995,
2009         #         'upload_date': '20160421',
2010         #     },
2011         #     'params': {
2012         #         'force_generic_extractor': True,
2013         #     },
2014         # }
2015     ]
2016
2017     def report_following_redirect(self, new_url):
2018         """Report information extraction."""
2019         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
2020
2021     def _extract_rss(self, url, video_id, doc):
2022         playlist_title = doc.find('./channel/title').text
2023         playlist_desc_el = doc.find('./channel/description')
2024         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
2025
2026         entries = []
2027         for it in doc.findall('./channel/item'):
2028             next_url = xpath_text(it, 'link', fatal=False)
2029             if not next_url:
2030                 enclosure_nodes = it.findall('./enclosure')
2031                 for e in enclosure_nodes:
2032                     next_url = e.attrib.get('url')
2033                     if next_url:
2034                         break
2035
2036             if not next_url:
2037                 continue
2038
2039             entries.append({
2040                 '_type': 'url_transparent',
2041                 'url': next_url,
2042                 'title': it.find('title').text,
2043             })
2044
2045         return {
2046             '_type': 'playlist',
2047             'id': url,
2048             'title': playlist_title,
2049             'description': playlist_desc,
2050             'entries': entries,
2051         }
2052
2053     def _extract_camtasia(self, url, video_id, webpage):
2054         """ Returns None if no camtasia video can be found. """
2055
2056         camtasia_cfg = self._search_regex(
2057             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
2058             webpage, 'camtasia configuration file', default=None)
2059         if camtasia_cfg is None:
2060             return None
2061
2062         title = self._html_search_meta('DC.title', webpage, fatal=True)
2063
2064         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
2065         camtasia_cfg = self._download_xml(
2066             camtasia_url, video_id,
2067             note='Downloading camtasia configuration',
2068             errnote='Failed to download camtasia configuration')
2069         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
2070
2071         entries = []
2072         for n in fileset_node.getchildren():
2073             url_n = n.find('./uri')
2074             if url_n is None:
2075                 continue
2076
2077             entries.append({
2078                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
2079                 'title': '%s - %s' % (title, n.tag),
2080                 'url': compat_urlparse.urljoin(url, url_n.text),
2081                 'duration': float_or_none(n.find('./duration').text),
2082             })
2083
2084         return {
2085             '_type': 'playlist',
2086             'entries': entries,
2087             'title': title,
2088         }
2089
2090     def _real_extract(self, url):
2091         if url.startswith('//'):
2092             return {
2093                 '_type': 'url',
2094                 'url': self.http_scheme() + url,
2095             }
2096
2097         parsed_url = compat_urlparse.urlparse(url)
2098         if not parsed_url.scheme:
2099             default_search = self._downloader.params.get('default_search')
2100             if default_search is None:
2101                 default_search = 'fixup_error'
2102
2103             if default_search in ('auto', 'auto_warning', 'fixup_error'):
2104                 if '/' in url:
2105                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
2106                     return self.url_result('http://' + url)
2107                 elif default_search != 'fixup_error':
2108                     if default_search == 'auto_warning':
2109                         if re.match(r'^(?:url|URL)$', url):
2110                             raise ExtractorError(
2111                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
2112                                 expected=True)
2113                         else:
2114                             self._downloader.report_warning(
2115                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
2116                     return self.url_result('ytsearch:' + url)
2117
2118             if default_search in ('error', 'fixup_error'):
2119                 raise ExtractorError(
2120                     '%r is not a valid URL. '
2121                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
2122                     % (url, url), expected=True)
2123             else:
2124                 if ':' not in default_search:
2125                     default_search += ':'
2126                 return self.url_result(default_search + url)
2127
2128         url, smuggled_data = unsmuggle_url(url)
2129         force_videoid = None
2130         is_intentional = smuggled_data and smuggled_data.get('to_generic')
2131         if smuggled_data and 'force_videoid' in smuggled_data:
2132             force_videoid = smuggled_data['force_videoid']
2133             video_id = force_videoid
2134         else:
2135             video_id = self._generic_id(url)
2136
2137         self.to_screen('%s: Requesting header' % video_id)
2138
2139         head_req = HEADRequest(url)
2140         head_response = self._request_webpage(
2141             head_req, video_id,
2142             note=False, errnote='Could not send HEAD request to %s' % url,
2143             fatal=False)
2144
2145         if head_response is not False:
2146             # Check for redirect
2147             new_url = compat_str(head_response.geturl())
2148             if url != new_url:
2149                 self.report_following_redirect(new_url)
2150                 if force_videoid:
2151                     new_url = smuggle_url(
2152                         new_url, {'force_videoid': force_videoid})
2153                 return self.url_result(new_url)
2154
2155         full_response = None
2156         if head_response is False:
2157             request = sanitized_Request(url)
2158             request.add_header('Accept-Encoding', '*')
2159             full_response = self._request_webpage(request, video_id)
2160             head_response = full_response
2161
2162         info_dict = {
2163             'id': video_id,
2164             'title': self._generic_title(url),
2165             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
2166         }
2167
2168         # Check for direct link to a video
2169         content_type = head_response.headers.get('Content-Type', '').lower()
2170         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
2171         if m:
2172             format_id = compat_str(m.group('format_id'))
2173             if format_id.endswith('mpegurl'):
2174                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
2175             elif format_id == 'f4m':
2176                 formats = self._extract_f4m_formats(url, video_id)
2177             else:
2178                 formats = [{
2179                     'format_id': format_id,
2180                     'url': url,
2181                     'vcodec': 'none' if m.group('type') == 'audio' else None
2182                 }]
2183                 info_dict['direct'] = True
2184             self._sort_formats(formats)
2185             info_dict['formats'] = formats
2186             return info_dict
2187
2188         if not self._downloader.params.get('test', False) and not is_intentional:
2189             force = self._downloader.params.get('force_generic_extractor', False)
2190             self._downloader.report_warning(
2191                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
2192
2193         if not full_response:
2194             request = sanitized_Request(url)
2195             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
2196             # making it impossible to download only chunk of the file (yet we need only 512kB to
2197             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
2198             # that will always result in downloading the whole file that is not desirable.
2199             # Therefore for extraction pass we have to override Accept-Encoding to any in order
2200             # to accept raw bytes and being able to download only a chunk.
2201             # It may probably better to solve this by checking Content-Type for application/octet-stream
2202             # after HEAD request finishes, but not sure if we can rely on this.
2203             request.add_header('Accept-Encoding', '*')
2204             full_response = self._request_webpage(request, video_id)
2205
2206         first_bytes = full_response.read(512)
2207
2208         # Is it an M3U playlist?
2209         if first_bytes.startswith(b'#EXTM3U'):
2210             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
2211             self._sort_formats(info_dict['formats'])
2212             return info_dict
2213
2214         # Maybe it's a direct link to a video?
2215         # Be careful not to download the whole thing!
2216         if not is_html(first_bytes):
2217             self._downloader.report_warning(
2218                 'URL could be a direct video link, returning it as such.')
2219             info_dict.update({
2220                 'direct': True,
2221                 'url': url,
2222             })
2223             return info_dict
2224
2225         webpage = self._webpage_read_content(
2226             full_response, url, video_id, prefix=first_bytes)
2227
2228         self.report_extraction(video_id)
2229
2230         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
2231         try:
2232             doc = compat_etree_fromstring(webpage.encode('utf-8'))
2233             if doc.tag == 'rss':
2234                 return self._extract_rss(url, video_id, doc)
2235             elif doc.tag == 'SmoothStreamingMedia':
2236                 info_dict['formats'] = self._parse_ism_formats(doc, url)
2237                 self._sort_formats(info_dict['formats'])
2238                 return info_dict
2239             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
2240                 smil = self._parse_smil(doc, url, video_id)
2241                 self._sort_formats(smil['formats'])
2242                 return smil
2243             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
2244                 return self.playlist_result(
2245                     self._parse_xspf(
2246                         doc, video_id, xspf_url=url,
2247                         xspf_base_url=compat_str(full_response.geturl())),
2248                     video_id)
2249             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
2250                 info_dict['formats'] = self._parse_mpd_formats(
2251                     doc,
2252                     mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
2253                     mpd_url=url)
2254                 self._sort_formats(info_dict['formats'])
2255                 return info_dict
2256             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
2257                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
2258                 self._sort_formats(info_dict['formats'])
2259                 return info_dict
2260         except compat_xml_parse_error:
2261             pass
2262
2263         # Is it a Camtasia project?
2264         camtasia_res = self._extract_camtasia(url, video_id, webpage)
2265         if camtasia_res is not None:
2266             return camtasia_res
2267
2268         # Sometimes embedded video player is hidden behind percent encoding
2269         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
2270         # Unescaping the whole page allows to handle those cases in a generic way
2271         webpage = compat_urllib_parse_unquote(webpage)
2272
2273         # it's tempting to parse this further, but you would
2274         # have to take into account all the variations like
2275         #   Video Title - Site Name
2276         #   Site Name | Video Title
2277         #   Video Title - Tagline | Site Name
2278         # and so on and so forth; it's just not practical
2279         video_title = self._og_search_title(
2280             webpage, default=None) or self._html_search_regex(
2281             r'(?s)<title>(.*?)</title>', webpage, 'video title',
2282             default='video')
2283
2284         # Try to detect age limit automatically
2285         age_limit = self._rta_search(webpage)
2286         # And then there are the jokers who advertise that they use RTA,
2287         # but actually don't.
2288         AGE_LIMIT_MARKERS = [
2289             r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
2290         ]
2291         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
2292             age_limit = 18
2293
2294         # video uploader is domain name
2295         video_uploader = self._search_regex(
2296             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
2297
2298         video_description = self._og_search_description(webpage, default=None)
2299         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
2300
2301         info_dict.update({
2302             'title': video_title,
2303             'description': video_description,
2304             'thumbnail': video_thumbnail,
2305             'age_limit': age_limit,
2306         })
2307
2308         # Look for Brightcove Legacy Studio embeds
2309         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
2310         if bc_urls:
2311             entries = [{
2312                 '_type': 'url',
2313                 'url': smuggle_url(bc_url, {'Referer': url}),
2314                 'ie_key': 'BrightcoveLegacy'
2315             } for bc_url in bc_urls]
2316
2317             return {
2318                 '_type': 'playlist',
2319                 'title': video_title,
2320                 'id': video_id,
2321                 'entries': entries,
2322             }
2323
2324         # Look for Brightcove New Studio embeds
2325         bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
2326         if bc_urls:
2327             return self.playlist_from_matches(
2328                 bc_urls, video_id, video_title,
2329                 getter=lambda x: smuggle_url(x, {'referrer': url}),
2330                 ie='BrightcoveNew')
2331
2332         # Look for Nexx embeds
2333         nexx_urls = NexxIE._extract_urls(webpage)
2334         if nexx_urls:
2335             return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
2336
2337         # Look for Nexx iFrame embeds
2338         nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
2339         if nexx_embed_urls:
2340             return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
2341
2342         # Look for ThePlatform embeds
2343         tp_urls = ThePlatformIE._extract_urls(webpage)
2344         if tp_urls:
2345             return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
2346
2347         # Look for Vessel embeds
2348         vessel_urls = VesselIE._extract_urls(webpage)
2349         if vessel_urls:
2350             return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
2351
2352         # Look for embedded rtl.nl player
2353         matches = re.findall(
2354             r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
2355             webpage)
2356         if matches:
2357             return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
2358
2359         vimeo_urls = VimeoIE._extract_urls(url, webpage)
2360         if vimeo_urls:
2361             return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
2362
2363         vid_me_embed_url = self._search_regex(
2364             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
2365             webpage, 'vid.me embed', default=None)
2366         if vid_me_embed_url is not None:
2367             return self.url_result(vid_me_embed_url, 'Vidme')
2368
2369         # Look for YouTube embeds
2370         youtube_urls = YoutubeIE._extract_urls(webpage)
2371         if youtube_urls:
2372             return self.playlist_from_matches(
2373                 youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
2374
2375         matches = DailymotionIE._extract_urls(webpage)
2376         if matches:
2377             return self.playlist_from_matches(matches, video_id, video_title)
2378
2379         # Look for embedded Dailymotion playlist player (#3822)
2380         m = re.search(
2381             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
2382         if m:
2383             playlists = re.findall(
2384                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
2385             if playlists:
2386                 return self.playlist_from_matches(
2387                     playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
2388
2389         # Look for DailyMail embeds
2390         dailymail_urls = DailyMailIE._extract_urls(webpage)
2391         if dailymail_urls:
2392             return self.playlist_from_matches(
2393                 dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
2394
2395         # Look for embedded Wistia player
2396         wistia_url = WistiaIE._extract_url(webpage)
2397         if wistia_url:
2398             return {
2399                 '_type': 'url_transparent',
2400                 'url': self._proto_relative_url(wistia_url),
2401                 'ie_key': WistiaIE.ie_key(),
2402                 'uploader': video_uploader,
2403             }
2404
2405         # Look for SVT player
2406         svt_url = SVTIE._extract_url(webpage)
2407         if svt_url:
2408             return self.url_result(svt_url, 'SVT')
2409
2410         # Look for Bandcamp pages with custom domain
2411         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
2412         if mobj is not None:
2413             burl = unescapeHTML(mobj.group(1))
2414             # Don't set the extractor because it can be a track url or an album
2415             return self.url_result(burl)
2416
2417         # Look for embedded Vevo player
2418         mobj = re.search(
2419             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
2420         if mobj is not None:
2421             return self.url_result(mobj.group('url'))
2422
2423         # Look for embedded Viddler player
2424         mobj = re.search(
2425             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
2426             webpage)
2427         if mobj is not None:
2428             return self.url_result(mobj.group('url'))
2429
2430         # Look for NYTimes player
2431         mobj = re.search(
2432             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
2433             webpage)
2434         if mobj is not None:
2435             return self.url_result(mobj.group('url'))
2436
2437         # Look for Libsyn player
2438         mobj = re.search(
2439             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
2440         if mobj is not None:
2441             return self.url_result(mobj.group('url'))
2442
2443         # Look for Ooyala videos
2444         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
2445                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
2446                 re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
2447                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
2448                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
2449         if mobj is not None:
2450             embed_token = self._search_regex(
2451                 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
2452                 webpage, 'ooyala embed token', default=None)
2453             return OoyalaIE._build_url_result(smuggle_url(
2454                 mobj.group('ec'), {
2455                     'domain': url,
2456                     'embed_token': embed_token,
2457                 }))
2458
2459         # Look for multiple Ooyala embeds on SBN network websites
2460         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
2461         if mobj is not None:
2462             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
2463             if embeds:
2464                 return self.playlist_from_matches(
2465                     embeds, video_id, video_title,
2466                     getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
2467
2468         # Look for Aparat videos
2469         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
2470         if mobj is not None:
2471             return self.url_result(mobj.group(1), 'Aparat')
2472
2473         # Look for MPORA videos
2474         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
2475         if mobj is not None:
2476             return self.url_result(mobj.group(1), 'Mpora')
2477
2478         # Look for embedded NovaMov-based player
2479         mobj = re.search(
2480             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
2481                     (?P<url>http://(?:(?:embed|www)\.)?
2482                         (?:novamov\.com|
2483                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
2484                            videoweed\.(?:es|com)|
2485                            movshare\.(?:net|sx|ag)|
2486                            divxstage\.(?:eu|net|ch|co|at|ag))
2487                         /embed\.php.+?)\1''', webpage)
2488         if mobj is not None:
2489             return self.url_result(mobj.group('url'))
2490
2491         # Look for embedded Facebook player
2492         facebook_urls = FacebookIE._extract_urls(webpage)
2493         if facebook_urls:
2494             return self.playlist_from_matches(facebook_urls, video_id, video_title)
2495
2496         # Look for embedded VK player
2497         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
2498         if mobj is not None:
2499             return self.url_result(mobj.group('url'), 'VK')
2500
2501         # Look for embedded Odnoklassniki player
2502         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
2503         if mobj is not None:
2504             return self.url_result(mobj.group('url'), 'Odnoklassniki')
2505
2506         # Look for embedded ivi player
2507         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
2508         if mobj is not None:
2509             return self.url_result(mobj.group('url'), 'Ivi')
2510
2511         # Look for embedded Huffington Post player
2512         mobj = re.search(
2513             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
2514         if mobj is not None:
2515             return self.url_result(mobj.group('url'), 'HuffPost')
2516
2517         # Look for embed.ly
2518         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
2519         if mobj is not None:
2520             return self.url_result(mobj.group('url'))
2521         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
2522         if mobj is not None:
2523             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
2524
2525         # Look for funnyordie embed
2526         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
2527         if matches:
2528             return self.playlist_from_matches(
2529                 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
2530
2531         # Look for BBC iPlayer embed
2532         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
2533         if matches:
2534             return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
2535
2536         # Look for embedded RUTV player
2537         rutv_url = RUTVIE._extract_url(webpage)
2538         if rutv_url:
2539             return self.url_result(rutv_url, 'RUTV')
2540
2541         # Look for embedded TVC player
2542         tvc_url = TVCIE._extract_url(webpage)
2543         if tvc_url:
2544             return self.url_result(tvc_url, 'TVC')
2545
2546         # Look for embedded SportBox player
2547         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
2548         if sportbox_urls:
2549             return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
2550
2551         # Look for embedded XHamster player
2552         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
2553         if xhamster_urls:
2554             return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
2555
2556         # Look for embedded TNAFlixNetwork player
2557         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
2558         if tnaflix_urls:
2559             return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
2560
2561         # Look for embedded PornHub player
2562         pornhub_urls = PornHubIE._extract_urls(webpage)
2563         if pornhub_urls:
2564             return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
2565
2566         # Look for embedded DrTuber player
2567         drtuber_urls = DrTuberIE._extract_urls(webpage)
2568         if drtuber_urls:
2569             return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
2570
2571         # Look for embedded RedTube player
2572         redtube_urls = RedTubeIE._extract_urls(webpage)
2573         if redtube_urls:
2574             return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
2575
2576         # Look for embedded Tube8 player
2577         tube8_urls = Tube8IE._extract_urls(webpage)
2578         if tube8_urls:
2579             return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
2580
2581         # Look for embedded Tvigle player
2582         mobj = re.search(
2583             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
2584         if mobj is not None:
2585             return self.url_result(mobj.group('url'), 'Tvigle')
2586
2587         # Look for embedded TED player
2588         mobj = re.search(
2589             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
2590         if mobj is not None:
2591             return self.url_result(mobj.group('url'), 'TED')
2592
2593         # Look for embedded Ustream videos
2594         ustream_url = UstreamIE._extract_url(webpage)
2595         if ustream_url:
2596             return self.url_result(ustream_url, UstreamIE.ie_key())
2597
2598         # Look for embedded arte.tv player
2599         mobj = re.search(
2600             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
2601             webpage)
2602         if mobj is not None:
2603             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
2604
2605         # Look for embedded francetv player
2606         mobj = re.search(
2607             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
2608             webpage)
2609         if mobj is not None:
2610             return self.url_result(mobj.group('url'))
2611
2612         # Look for embedded smotri.com player
2613         smotri_url = SmotriIE._extract_url(webpage)
2614         if smotri_url:
2615             return self.url_result(smotri_url, 'Smotri')
2616
2617         # Look for embedded Myvi.ru player
2618         myvi_url = MyviIE._extract_url(webpage)
2619         if myvi_url:
2620             return self.url_result(myvi_url)
2621
2622         # Look for embedded soundcloud player
2623         soundcloud_urls = SoundcloudIE._extract_urls(webpage)
2624         if soundcloud_urls:
2625             return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
2626
2627         # Look for tunein player
2628         tunein_urls = TuneInBaseIE._extract_urls(webpage)
2629         if tunein_urls:
2630             return self.playlist_from_matches(tunein_urls, video_id, video_title)
2631
2632         # Look for embedded mtvservices player
2633         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
2634         if mtvservices_url:
2635             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
2636
2637         # Look for embedded yahoo player
2638         mobj = re.search(
2639             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
2640             webpage)
2641         if mobj is not None:
2642             return self.url_result(mobj.group('url'), 'Yahoo')
2643
2644         # Look for embedded sbs.com.au player
2645         mobj = re.search(
2646             r'''(?x)
2647             (?:
2648                 <meta\s+property="og:video"\s+content=|
2649                 <iframe[^>]+?src=
2650             )
2651             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2652             webpage)
2653         if mobj is not None:
2654             return self.url_result(mobj.group('url'), 'SBS')
2655
2656         # Look for embedded Cinchcast player
2657         mobj = re.search(
2658             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
2659             webpage)
2660         if mobj is not None:
2661             return self.url_result(mobj.group('url'), 'Cinchcast')
2662
2663         mobj = re.search(
2664             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
2665             webpage)
2666         if not mobj:
2667             mobj = re.search(
2668                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
2669                 webpage)
2670         if mobj is not None:
2671             return self.url_result(mobj.group('url'), 'MLB')
2672
2673         mobj = re.search(
2674             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
2675             webpage)
2676         if mobj is not None:
2677             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
2678
2679         mobj = re.search(
2680             r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
2681             webpage)
2682         if mobj is not None:
2683             return self.url_result(mobj.group('url'), 'Livestream')
2684
2685         # Look for Zapiks embed
2686         mobj = re.search(
2687             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
2688         if mobj is not None:
2689             return self.url_result(mobj.group('url'), 'Zapiks')
2690
2691         # Look for Kaltura embeds
2692         kaltura_url = KalturaIE._extract_url(webpage)
2693         if kaltura_url:
2694             return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
2695
2696         # Look for EaglePlatform embeds
2697         eagleplatform_url = EaglePlatformIE._extract_url(webpage)
2698         if eagleplatform_url:
2699             return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
2700
2701         # Look for ClipYou (uses EaglePlatform) embeds
2702         mobj = re.search(
2703             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
2704         if mobj is not None:
2705             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
2706
2707         # Look for Pladform embeds
2708         pladform_url = PladformIE._extract_url(webpage)
2709         if pladform_url:
2710             return self.url_result(pladform_url)
2711
2712         # Look for Videomore embeds
2713         videomore_url = VideomoreIE._extract_url(webpage)
2714         if videomore_url:
2715             return self.url_result(videomore_url)
2716
2717         # Look for Webcaster embeds
2718         webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
2719         if webcaster_url:
2720             return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
2721
2722         # Look for Playwire embeds
2723         mobj = re.search(
2724             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
2725         if mobj is not None:
2726             return self.url_result(mobj.group('url'))
2727
2728         # Look for 5min embeds
2729         mobj = re.search(
2730             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
2731         if mobj is not None:
2732             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
2733
2734         # Look for Crooks and Liars embeds
2735         mobj = re.search(
2736             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
2737         if mobj is not None:
2738             return self.url_result(mobj.group('url'))
2739
2740         # Look for NBC Sports VPlayer embeds
2741         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
2742         if nbc_sports_url:
2743             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
2744
2745         # Look for NBC News embeds
2746         nbc_news_embed_url = re.search(
2747             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
2748         if nbc_news_embed_url:
2749             return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
2750
2751         # Look for Google Drive embeds
2752         google_drive_url = GoogleDriveIE._extract_url(webpage)
2753         if google_drive_url:
2754             return self.url_result(google_drive_url, 'GoogleDrive')
2755
2756         # Look for UDN embeds
2757         mobj = re.search(
2758             r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
2759         if mobj is not None:
2760             return self.url_result(
2761                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
2762
2763         # Look for Senate ISVP iframe
2764         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2765         if senate_isvp_url:
2766             return self.url_result(senate_isvp_url, 'SenateISVP')
2767
2768         # Look for OnionStudios embeds
2769         onionstudios_url = OnionStudiosIE._extract_url(webpage)
2770         if onionstudios_url:
2771             return self.url_result(onionstudios_url)
2772
2773         # Look for ViewLift embeds
2774         viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2775         if viewlift_url:
2776             return self.url_result(viewlift_url)
2777
2778         # Look for JWPlatform embeds
2779         jwplatform_urls = JWPlatformIE._extract_urls(webpage)
2780         if jwplatform_urls:
2781             return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
2782
2783         # Look for Digiteka embeds
2784         digiteka_url = DigitekaIE._extract_url(webpage)
2785         if digiteka_url:
2786             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2787
2788         # Look for Arkena embeds
2789         arkena_url = ArkenaIE._extract_url(webpage)
2790         if arkena_url:
2791             return self.url_result(arkena_url, ArkenaIE.ie_key())
2792
2793         # Look for Piksel embeds
2794         piksel_url = PikselIE._extract_url(webpage)
2795         if piksel_url:
2796             return self.url_result(piksel_url, PikselIE.ie_key())
2797
2798         # Look for Limelight embeds
2799         limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
2800         if limelight_urls:
2801             return self.playlist_result(
2802                 limelight_urls, video_id, video_title, video_description)
2803
2804         # Look for Anvato embeds
2805         anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
2806         if anvato_urls:
2807             return self.playlist_result(
2808                 anvato_urls, video_id, video_title, video_description)
2809
2810         # Look for AdobeTVVideo embeds
2811         mobj = re.search(
2812             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2813             webpage)
2814         if mobj is not None:
2815             return self.url_result(
2816                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2817                 'AdobeTVVideo')
2818
2819         # Look for Vine embeds
2820         mobj = re.search(
2821             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2822             webpage)
2823         if mobj is not None:
2824             return self.url_result(
2825                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2826
2827         # Look for VODPlatform embeds
2828         mobj = re.search(
2829             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
2830             webpage)
2831         if mobj is not None:
2832             return self.url_result(
2833                 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
2834
2835         # Look for Mangomolo embeds
2836         mobj = re.search(
2837             r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
2838                 (?:
2839                     video\?.*?\bid=(?P<video_id>\d+)|
2840                     index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
2841                 ).+?)\1''', webpage)
2842         if mobj is not None:
2843             info = {
2844                 '_type': 'url_transparent',
2845                 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
2846                 'title': video_title,
2847                 'description': video_description,
2848                 'thumbnail': video_thumbnail,
2849                 'uploader': video_uploader,
2850             }
2851             video_id = mobj.group('video_id')
2852             if video_id:
2853                 info.update({
2854                     'ie_key': 'MangomoloVideo',
2855                     'id': video_id,
2856                 })
2857             else:
2858                 info.update({
2859                     'ie_key': 'MangomoloLive',
2860                     'id': mobj.group('channel_id'),
2861                 })
2862             return info
2863
2864         # Look for Instagram embeds
2865         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2866         if instagram_embed_url is not None:
2867             return self.url_result(
2868                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2869
2870         # Look for LiveLeak embeds
2871         liveleak_urls = LiveLeakIE._extract_urls(webpage)
2872         if liveleak_urls:
2873             return self.playlist_from_matches(liveleak_urls, video_id, video_title)
2874
2875         # Look for 3Q SDN embeds
2876         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2877         if threeqsdn_url:
2878             return {
2879                 '_type': 'url_transparent',
2880                 'ie_key': ThreeQSDNIE.ie_key(),
2881                 'url': self._proto_relative_url(threeqsdn_url),
2882                 'title': video_title,
2883                 'description': video_description,
2884                 'thumbnail': video_thumbnail,
2885                 'uploader': video_uploader,
2886             }
2887
2888         # Look for VBOX7 embeds
2889         vbox7_url = Vbox7IE._extract_url(webpage)
2890         if vbox7_url:
2891             return self.url_result(vbox7_url, Vbox7IE.ie_key())
2892
2893         # Look for DBTV embeds
2894         dbtv_urls = DBTVIE._extract_urls(webpage)
2895         if dbtv_urls:
2896             return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
2897
2898         # Look for Videa embeds
2899         videa_urls = VideaIE._extract_urls(webpage)
2900         if videa_urls:
2901             return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
2902
2903         # Look for 20 minuten embeds
2904         twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
2905         if twentymin_urls:
2906             return self.playlist_from_matches(
2907                 twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
2908
2909         # Look for Openload embeds
2910         openload_urls = OpenloadIE._extract_urls(webpage)
2911         if openload_urls:
2912             return self.playlist_from_matches(
2913                 openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
2914
2915         # Look for VideoPress embeds
2916         videopress_urls = VideoPressIE._extract_urls(webpage)
2917         if videopress_urls:
2918             return self.playlist_from_matches(
2919                 videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
2920
2921         # Look for Rutube embeds
2922         rutube_urls = RutubeIE._extract_urls(webpage)
2923         if rutube_urls:
2924             return self.playlist_from_matches(
2925                 rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
2926
2927         # Look for WashingtonPost embeds
2928         wapo_urls = WashingtonPostIE._extract_urls(webpage)
2929         if wapo_urls:
2930             return self.playlist_from_matches(
2931                 wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
2932
2933         # Look for Mediaset embeds
2934         mediaset_urls = MediasetIE._extract_urls(webpage)
2935         if mediaset_urls:
2936             return self.playlist_from_matches(
2937                 mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
2938
2939         # Look for JOJ.sk embeds
2940         joj_urls = JojIE._extract_urls(webpage)
2941         if joj_urls:
2942             return self.playlist_from_matches(
2943                 joj_urls, video_id, video_title, ie=JojIE.ie_key())
2944
2945         # Look for megaphone.fm embeds
2946         mpfn_urls = MegaphoneIE._extract_urls(webpage)
2947         if mpfn_urls:
2948             return self.playlist_from_matches(
2949                 mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
2950
2951         # Look for vzaar embeds
2952         vzaar_urls = VzaarIE._extract_urls(webpage)
2953         if vzaar_urls:
2954             return self.playlist_from_matches(
2955                 vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
2956
2957         channel9_urls = Channel9IE._extract_urls(webpage)
2958         if channel9_urls:
2959             return self.playlist_from_matches(
2960                 channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
2961
2962         vshare_urls = VShareIE._extract_urls(webpage)
2963         if vshare_urls:
2964             return self.playlist_from_matches(
2965                 vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
2966
2967         # Look for Mediasite embeds
2968         mediasite_urls = MediasiteIE._extract_urls(webpage)
2969         if mediasite_urls:
2970             entries = [
2971                 self.url_result(smuggle_url(
2972                     compat_urlparse.urljoin(url, mediasite_url),
2973                     {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
2974                 for mediasite_url in mediasite_urls]
2975             return self.playlist_result(entries, video_id, video_title)
2976
2977         springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
2978         if springboardplatform_urls:
2979             return self.playlist_from_matches(
2980                 springboardplatform_urls, video_id, video_title,
2981                 ie=SpringboardPlatformIE.ie_key())
2982
2983         yapfiles_urls = YapFilesIE._extract_urls(webpage)
2984         if yapfiles_urls:
2985             return self.playlist_from_matches(
2986                 yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
2987
2988         vice_urls = ViceIE._extract_urls(webpage)
2989         if vice_urls:
2990             return self.playlist_from_matches(
2991                 vice_urls, video_id, video_title, ie=ViceIE.ie_key())
2992
2993         xfileshare_urls = XFileShareIE._extract_urls(webpage)
2994         if xfileshare_urls:
2995             return self.playlist_from_matches(
2996                 xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
2997
2998         sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
2999             r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
3000             webpage)]
3001         if sharevideos_urls:
3002             return self.playlist_from_matches(
3003                 sharevideos_urls, video_id, video_title)
3004
3005         def merge_dicts(dict1, dict2):
3006             merged = {}
3007             for k, v in dict1.items():
3008                 if v is not None:
3009                     merged[k] = v
3010             for k, v in dict2.items():
3011                 if v is None:
3012                     continue
3013                 if (k not in merged or
3014                         (isinstance(v, compat_str) and v and
3015                             isinstance(merged[k], compat_str) and
3016                             not merged[k])):
3017                     merged[k] = v
3018             return merged
3019
3020         # Look for HTML5 media
3021         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
3022         if entries:
3023             if len(entries) == 1:
3024                 entries[0].update({
3025                     'id': video_id,
3026                     'title': video_title,
3027                 })
3028             else:
3029                 for num, entry in enumerate(entries, start=1):
3030                     entry.update({
3031                         'id': '%s-%s' % (video_id, num),
3032                         'title': '%s (%d)' % (video_title, num),
3033                     })
3034             for entry in entries:
3035                 self._sort_formats(entry['formats'])
3036             return self.playlist_result(entries, video_id, video_title)
3037
3038         jwplayer_data = self._find_jwplayer_data(
3039             webpage, video_id, transform_source=js_to_json)
3040         if jwplayer_data:
3041             info = self._parse_jwplayer_data(
3042                 jwplayer_data, video_id, require_title=False, base_url=url)
3043             return merge_dicts(info, info_dict)
3044
3045         # Video.js embed
3046         mobj = re.search(
3047             r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
3048             webpage)
3049         if mobj is not None:
3050             sources = self._parse_json(
3051                 mobj.group(1), video_id, transform_source=js_to_json,
3052                 fatal=False) or []
3053             if not isinstance(sources, list):
3054                 sources = [sources]
3055             formats = []
3056             for source in sources:
3057                 src = source.get('src')
3058                 if not src or not isinstance(src, compat_str):
3059                     continue
3060                 src = compat_urlparse.urljoin(url, src)
3061                 src_type = source.get('type')
3062                 if isinstance(src_type, compat_str):
3063                     src_type = src_type.lower()
3064                 ext = determine_ext(src).lower()
3065                 if src_type == 'video/youtube':
3066                     return self.url_result(src, YoutubeIE.ie_key())
3067                 if src_type == 'application/dash+xml' or ext == 'mpd':
3068                     formats.extend(self._extract_mpd_formats(
3069                         src, video_id, mpd_id='dash', fatal=False))
3070                 elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
3071                     formats.extend(self._extract_m3u8_formats(
3072                         src, video_id, 'mp4', entry_protocol='m3u8_native',
3073                         m3u8_id='hls', fatal=False))
3074                 else:
3075                     formats.append({
3076                         'url': src,
3077                         'ext': (mimetype2ext(src_type) or
3078                                 ext if ext in KNOWN_EXTENSIONS else 'mp4'),
3079                     })
3080             if formats:
3081                 self._sort_formats(formats)
3082                 info_dict['formats'] = formats
3083                 return info_dict
3084
3085         # Looking for http://schema.org/VideoObject
3086         json_ld = self._search_json_ld(
3087             webpage, video_id, default={}, expected_type='VideoObject')
3088         if json_ld.get('url'):
3089             return merge_dicts(json_ld, info_dict)
3090
3091         def check_video(vurl):
3092             if YoutubeIE.suitable(vurl):
3093                 return True
3094             if RtmpIE.suitable(vurl):
3095                 return True
3096             vpath = compat_urlparse.urlparse(vurl).path
3097             vext = determine_ext(vpath)
3098             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
3099
3100         def filter_video(urls):
3101             return list(filter(check_video, urls))
3102
3103         # Start with something easy: JW Player in SWFObject
3104         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
3105         if not found:
3106             # Look for gorilla-vid style embedding
3107             found = filter_video(re.findall(r'''(?sx)
3108                 (?:
3109                     jw_plugins|
3110                     JWPlayerOptions|
3111                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
3112                 )
3113                 .*?
3114                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
3115         if not found:
3116             # Broaden the search a little bit
3117             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
3118         if not found:
3119             # Broaden the findall a little bit: JWPlayer JS loader
3120             found = filter_video(re.findall(
3121                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
3122         if not found:
3123             # Flow player
3124             found = filter_video(re.findall(r'''(?xs)
3125                 flowplayer\("[^"]+",\s*
3126                     \{[^}]+?\}\s*,
3127                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
3128                         ["']?url["']?\s*:\s*["']([^"']+)["']
3129             ''', webpage))
3130         if not found:
3131             # Cinerama player
3132             found = re.findall(
3133                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
3134         if not found:
3135             # Try to find twitter cards info
3136             # twitter:player:stream should be checked before twitter:player since
3137             # it is expected to contain a raw stream (see
3138             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
3139             found = filter_video(re.findall(
3140                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
3141         if not found:
3142             # We look for Open Graph info:
3143             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
3144             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
3145             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
3146             if m_video_type is not None:
3147                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
3148         if not found:
3149             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
3150             found = re.search(
3151                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
3152                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
3153                 webpage)
3154             if not found:
3155                 # Look also in Refresh HTTP header
3156                 refresh_header = head_response.headers.get('Refresh')
3157                 if refresh_header:
3158                     # In python 2 response HTTP headers are bytestrings
3159                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
3160                         refresh_header = refresh_header.decode('iso-8859-1')
3161                     found = re.search(REDIRECT_REGEX, refresh_header)
3162             if found:
3163                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
3164                 if new_url != url:
3165                     self.report_following_redirect(new_url)
3166                     return {
3167                         '_type': 'url',
3168                         'url': new_url,
3169                     }
3170                 else:
3171                     found = None
3172
3173         if not found:
3174             # twitter:player is a https URL to iframe player that may or may not
3175             # be supported by youtube-dl thus this is checked the very last (see
3176             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
3177             embed_url = self._html_search_meta('twitter:player', webpage, default=None)
3178             if embed_url and embed_url != url:
3179                 return self.url_result(embed_url)
3180
3181         if not found:
3182             raise UnsupportedError(url)
3183
3184         entries = []
3185         for video_url in orderedSet(found):
3186             video_url = unescapeHTML(video_url)
3187             video_url = video_url.replace('\\/', '/')
3188             video_url = compat_urlparse.urljoin(url, video_url)
3189             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
3190
3191             # Sometimes, jwplayer extraction will result in a YouTube URL
3192             if YoutubeIE.suitable(video_url):
3193                 entries.append(self.url_result(video_url, 'Youtube'))
3194                 continue
3195
3196             # here's a fun little line of code for you:
3197             video_id = os.path.splitext(video_id)[0]
3198
3199             entry_info_dict = {
3200                 'id': video_id,
3201                 'uploader': video_uploader,
3202                 'title': video_title,
3203                 'age_limit': age_limit,
3204             }
3205
3206             if RtmpIE.suitable(video_url):
3207                 entry_info_dict.update({
3208                     '_type': 'url_transparent',
3209                     'ie_key': RtmpIE.ie_key(),
3210                     'url': video_url,
3211                 })
3212                 entries.append(entry_info_dict)
3213                 continue
3214
3215             ext = determine_ext(video_url)
3216             if ext == 'smil':
3217                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
3218             elif ext == 'xspf':
3219                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
3220             elif ext == 'm3u8':
3221                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
3222             elif ext == 'mpd':
3223                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
3224             elif ext == 'f4m':
3225                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
3226             elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
3227                 # Just matching .ism/manifest is not enough to be reliably sure
3228                 # whether it's actually an ISM manifest or some other streaming
3229                 # manifest since there are various streaming URL formats
3230                 # possible (see [1]) as well as some other shenanigans like
3231                 # .smil/manifest URLs that actually serve an ISM (see [2]) and
3232                 # so on.
3233                 # Thus the most reasonable way to solve this is to delegate
3234                 # to generic extractor in order to look into the contents of
3235                 # the manifest itself.
3236                 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
3237                 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
3238                 entry_info_dict = self.url_result(
3239                     smuggle_url(video_url, {'to_generic': True}),
3240                     GenericIE.ie_key())
3241             else:
3242                 entry_info_dict['url'] = video_url
3243
3244             if entry_info_dict.get('formats'):
3245                 self._sort_formats(entry_info_dict['formats'])
3246
3247             entries.append(entry_info_dict)
3248
3249         if len(entries) == 1:
3250             return entries[0]
3251         else:
3252             for num, e in enumerate(entries, start=1):
3253                 # 'url' results don't have a title
3254                 if e.get('title') is not None:
3255                     e['title'] = '%s (%d)' % (e['title'], num)
3256             return {
3257                 '_type': 'playlist',
3258                 'entries': entries,
3259             }