Improve URL extraction
[youtube-dl] / youtube_dl / extractor / generic.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7 import sys
8
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12     compat_etree_fromstring,
13     compat_str,
14     compat_urllib_parse_unquote,
15     compat_urlparse,
16     compat_xml_parse_error,
17 )
18 from ..utils import (
19     determine_ext,
20     ExtractorError,
21     float_or_none,
22     HEADRequest,
23     is_html,
24     js_to_json,
25     KNOWN_EXTENSIONS,
26     merge_dicts,
27     mimetype2ext,
28     orderedSet,
29     sanitized_Request,
30     smuggle_url,
31     unescapeHTML,
32     unified_strdate,
33     unsmuggle_url,
34     UnsupportedError,
35     url_or_none,
36     xpath_text,
37 )
38 from .commonprotocols import RtmpIE
39 from .brightcove import (
40     BrightcoveLegacyIE,
41     BrightcoveNewIE,
42 )
43 from .nexx import (
44     NexxIE,
45     NexxEmbedIE,
46 )
47 from .nbc import NBCSportsVPlayerIE
48 from .ooyala import OoyalaIE
49 from .rutv import RUTVIE
50 from .tvc import TVCIE
51 from .sportbox import SportBoxEmbedIE
52 from .smotri import SmotriIE
53 from .myvi import MyviIE
54 from .condenast import CondeNastIE
55 from .udn import UDNEmbedIE
56 from .senateisvp import SenateISVPIE
57 from .svt import SVTIE
58 from .pornhub import PornHubIE
59 from .xhamster import XHamsterEmbedIE
60 from .tnaflix import TNAFlixNetworkEmbedIE
61 from .drtuber import DrTuberIE
62 from .redtube import RedTubeIE
63 from .tube8 import Tube8IE
64 from .vimeo import VimeoIE
65 from .dailymotion import DailymotionIE
66 from .dailymail import DailyMailIE
67 from .onionstudios import OnionStudiosIE
68 from .viewlift import ViewLiftEmbedIE
69 from .mtv import MTVServicesEmbeddedIE
70 from .pladform import PladformIE
71 from .videomore import VideomoreIE
72 from .webcaster import WebcasterFeedIE
73 from .googledrive import GoogleDriveIE
74 from .jwplatform import JWPlatformIE
75 from .digiteka import DigitekaIE
76 from .arkena import ArkenaIE
77 from .instagram import InstagramIE
78 from .liveleak import LiveLeakIE
79 from .threeqsdn import ThreeQSDNIE
80 from .theplatform import ThePlatformIE
81 from .vessel import VesselIE
82 from .kaltura import KalturaIE
83 from .eagleplatform import EaglePlatformIE
84 from .facebook import FacebookIE
85 from .soundcloud import SoundcloudIE
86 from .tunein import TuneInBaseIE
87 from .vbox7 import Vbox7IE
88 from .dbtv import DBTVIE
89 from .piksel import PikselIE
90 from .videa import VideaIE
91 from .twentymin import TwentyMinutenIE
92 from .ustream import UstreamIE
93 from .openload import OpenloadIE
94 from .videopress import VideoPressIE
95 from .rutube import RutubeIE
96 from .limelight import LimelightBaseIE
97 from .anvato import AnvatoIE
98 from .washingtonpost import WashingtonPostIE
99 from .wistia import WistiaIE
100 from .mediaset import MediasetIE
101 from .joj import JojIE
102 from .megaphone import MegaphoneIE
103 from .vzaar import VzaarIE
104 from .channel9 import Channel9IE
105 from .vshare import VShareIE
106 from .mediasite import MediasiteIE
107 from .springboardplatform import SpringboardPlatformIE
108 from .yapfiles import YapFilesIE
109 from .vice import ViceIE
110 from .xfileshare import XFileShareIE
111 from .cloudflarestream import CloudflareStreamIE
112 from .peertube import PeerTubeIE
113 from .indavideo import IndavideoEmbedIE
114 from .apa import APAIE
115 from .foxnews import FoxNewsIE
116
117
118 class GenericIE(InfoExtractor):
119     IE_DESC = 'Generic downloader that works on some sites'
120     _VALID_URL = r'.*'
121     IE_NAME = 'generic'
122     _TESTS = [
123         # Direct link to a video
124         {
125             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
126             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
127             'info_dict': {
128                 'id': 'trailer',
129                 'ext': 'mp4',
130                 'title': 'trailer',
131                 'upload_date': '20100513',
132             }
133         },
134         # Direct link to media delivered compressed (until Accept-Encoding is *)
135         {
136             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
137             'md5': '128c42e68b13950268b648275386fc74',
138             'info_dict': {
139                 'id': 'FictionJunction-Parallel_Hearts',
140                 'ext': 'flac',
141                 'title': 'FictionJunction-Parallel_Hearts',
142                 'upload_date': '20140522',
143             },
144             'expected_warnings': [
145                 'URL could be a direct video link, returning it as such.'
146             ],
147             'skip': 'URL invalid',
148         },
149         # Direct download with broken HEAD
150         {
151             'url': 'http://ai-radio.org:8000/radio.opus',
152             'info_dict': {
153                 'id': 'radio',
154                 'ext': 'opus',
155                 'title': 'radio',
156             },
157             'params': {
158                 'skip_download': True,  # infinite live stream
159             },
160             'expected_warnings': [
161                 r'501.*Not Implemented',
162                 r'400.*Bad Request',
163             ],
164         },
165         # Direct link with incorrect MIME type
166         {
167             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
168             'md5': '4ccbebe5f36706d85221f204d7eb5913',
169             'info_dict': {
170                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
171                 'id': '5_Lennart_Poettering_-_Systemd',
172                 'ext': 'webm',
173                 'title': '5_Lennart_Poettering_-_Systemd',
174                 'upload_date': '20141120',
175             },
176             'expected_warnings': [
177                 'URL could be a direct video link, returning it as such.'
178             ]
179         },
180         # RSS feed
181         {
182             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
183             'info_dict': {
184                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
185                 'title': 'Zero Punctuation',
186                 'description': 're:.*groundbreaking video review series.*'
187             },
188             'playlist_mincount': 11,
189         },
190         # RSS feed with enclosure
191         {
192             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
193             'info_dict': {
194                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
195                 'ext': 'm4v',
196                 'upload_date': '20150228',
197                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
198             }
199         },
200         # RSS feed with enclosures and unsupported link URLs
201         {
202             'url': 'http://www.hellointernet.fm/podcast?format=rss',
203             'info_dict': {
204                 'id': 'http://www.hellointernet.fm/podcast?format=rss',
205                 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
206                 'title': 'Hello Internet',
207             },
208             'playlist_mincount': 100,
209         },
210         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
211         {
212             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
213             'info_dict': {
214                 'id': 'smil',
215                 'ext': 'mp4',
216                 'title': 'Automatics, robotics and biocybernetics',
217                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
218                 'upload_date': '20130627',
219                 'formats': 'mincount:16',
220                 'subtitles': 'mincount:1',
221             },
222             'params': {
223                 'force_generic_extractor': True,
224                 'skip_download': True,
225             },
226         },
227         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
228         {
229             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
230             'info_dict': {
231                 'id': 'hds',
232                 'ext': 'flv',
233                 'title': 'hds',
234                 'formats': 'mincount:1',
235             },
236             'params': {
237                 'skip_download': True,
238             },
239         },
240         # SMIL from https://www.restudy.dk/video/play/id/1637
241         {
242             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
243             'info_dict': {
244                 'id': 'video_1637',
245                 'ext': 'flv',
246                 'title': 'video_1637',
247                 'formats': 'mincount:3',
248             },
249             'params': {
250                 'skip_download': True,
251             },
252         },
253         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
254         {
255             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
256             'info_dict': {
257                 'id': 'smil-service',
258                 'ext': 'flv',
259                 'title': 'smil-service',
260                 'formats': 'mincount:1',
261             },
262             'params': {
263                 'skip_download': True,
264             },
265         },
266         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
267         {
268             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
269             'info_dict': {
270                 'id': '4719370',
271                 'ext': 'mp4',
272                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
273                 'formats': 'mincount:3',
274             },
275             'params': {
276                 'skip_download': True,
277             },
278         },
279         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
280         {
281             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
282             'info_dict': {
283                 'id': 'mZlp2ctYIUEB',
284                 'ext': 'mp4',
285                 'title': 'Tikibad ontruimd wegens brand',
286                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
287                 'thumbnail': r're:^https?://.*\.jpg$',
288                 'duration': 33,
289             },
290             'params': {
291                 'skip_download': True,
292             },
293         },
294         # MPD from http://dash-mse-test.appspot.com/media.html
295         {
296             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
297             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
298             'info_dict': {
299                 'id': 'car-20120827-manifest',
300                 'ext': 'mp4',
301                 'title': 'car-20120827-manifest',
302                 'formats': 'mincount:9',
303                 'upload_date': '20130904',
304             },
305             'params': {
306                 'format': 'bestvideo',
307             },
308         },
309         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
310         {
311             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
312             'info_dict': {
313                 'id': 'content',
314                 'ext': 'mp4',
315                 'title': 'content',
316                 'formats': 'mincount:8',
317             },
318             'params': {
319                 # m3u8 downloads
320                 'skip_download': True,
321             },
322             'skip': 'video gone',
323         },
324         # m3u8 served with Content-Type: text/plain
325         {
326             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
327             'info_dict': {
328                 'id': 'index',
329                 'ext': 'mp4',
330                 'title': 'index',
331                 'upload_date': '20140720',
332                 'formats': 'mincount:11',
333             },
334             'params': {
335                 # m3u8 downloads
336                 'skip_download': True,
337             },
338             'skip': 'video gone',
339         },
340         # google redirect
341         {
342             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
343             'info_dict': {
344                 'id': 'cmQHVoWB5FY',
345                 'ext': 'mp4',
346                 'upload_date': '20130224',
347                 'uploader_id': 'TheVerge',
348                 'description': r're:^Chris Ziegler takes a look at the\.*',
349                 'uploader': 'The Verge',
350                 'title': 'First Firefox OS phones side-by-side',
351             },
352             'params': {
353                 'skip_download': False,
354             }
355         },
356         {
357             # redirect in Refresh HTTP header
358             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
359             'info_dict': {
360                 'id': 'pO8h3EaFRdo',
361                 'ext': 'mp4',
362                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
363                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
364                 'upload_date': '20150917',
365                 'uploader_id': 'brtvofficial',
366                 'uploader': 'Boiler Room',
367             },
368             'params': {
369                 'skip_download': False,
370             },
371         },
372         {
373             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
374             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
375             'info_dict': {
376                 'id': '13601338388002',
377                 'ext': 'mp4',
378                 'uploader': 'www.hodiho.fr',
379                 'title': 'R\u00e9gis plante sa Jeep',
380             }
381         },
382         # bandcamp page with custom domain
383         {
384             'add_ie': ['Bandcamp'],
385             'url': 'http://bronyrock.com/track/the-pony-mash',
386             'info_dict': {
387                 'id': '3235767654',
388                 'ext': 'mp3',
389                 'title': 'The Pony Mash',
390                 'uploader': 'M_Pallante',
391             },
392             'skip': 'There is a limit of 200 free downloads / month for the test song',
393         },
394         {
395             # embedded brightcove video
396             # it also tests brightcove videos that need to set the 'Referer'
397             # in the http requests
398             'add_ie': ['BrightcoveLegacy'],
399             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
400             'info_dict': {
401                 'id': '2765128793001',
402                 'ext': 'mp4',
403                 'title': 'Le cours de bourse : l’analyse technique',
404                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
405                 'uploader': 'BFM BUSINESS',
406             },
407             'params': {
408                 'skip_download': True,
409             },
410         },
411         {
412             # embedded with itemprop embedURL and video id spelled as `idVideo`
413             'add_id': ['BrightcoveLegacy'],
414             'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
415             'info_dict': {
416                 'id': '5255628253001',
417                 'ext': 'mp4',
418                 'title': 'md5:37c519b1128915607601e75a87995fc0',
419                 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
420                 'uploader': 'BFM BUSINESS',
421                 'uploader_id': '876450612001',
422                 'timestamp': 1482255315,
423                 'upload_date': '20161220',
424             },
425             'params': {
426                 'skip_download': True,
427             },
428         },
429         {
430             # https://github.com/rg3/youtube-dl/issues/2253
431             'url': 'http://bcove.me/i6nfkrc3',
432             'md5': '0ba9446db037002366bab3b3eb30c88c',
433             'info_dict': {
434                 'id': '3101154703001',
435                 'ext': 'mp4',
436                 'title': 'Still no power',
437                 'uploader': 'thestar.com',
438                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
439             },
440             'add_ie': ['BrightcoveLegacy'],
441             'skip': 'video gone',
442         },
443         {
444             'url': 'http://www.championat.com/video/football/v/87/87499.html',
445             'md5': 'fb973ecf6e4a78a67453647444222983',
446             'info_dict': {
447                 'id': '3414141473001',
448                 'ext': 'mp4',
449                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
450                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
451                 'uploader': 'Championat',
452             },
453         },
454         {
455             # https://github.com/rg3/youtube-dl/issues/3541
456             'add_ie': ['BrightcoveLegacy'],
457             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
458             'info_dict': {
459                 'id': '3866516442001',
460                 'ext': 'mp4',
461                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
462                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
463                 'uploader': 'SBS Broadcasting',
464             },
465             'skip': 'Restricted to Netherlands',
466             'params': {
467                 'skip_download': True,  # m3u8 download
468             },
469         },
470         {
471             # Brightcove video in <iframe>
472             'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
473             'md5': '36d74ef5e37c8b4a2ce92880d208b968',
474             'info_dict': {
475                 'id': '5360463607001',
476                 'ext': 'mp4',
477                 'title': '叙利亚失明儿童在废墟上演唱《心跳》  呼吁获得正常童年生活',
478                 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
479                 'uploader': 'United Nations',
480                 'uploader_id': '1362235914001',
481                 'timestamp': 1489593889,
482                 'upload_date': '20170315',
483             },
484             'add_ie': ['BrightcoveLegacy'],
485         },
486         {
487             # Brightcove with alternative playerID key
488             'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
489             'info_dict': {
490                 'id': 'nmeth.2062_SV1',
491                 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
492             },
493             'playlist': [{
494                 'info_dict': {
495                     'id': '2228375078001',
496                     'ext': 'mp4',
497                     'title': 'nmeth.2062-sv1',
498                     'description': 'nmeth.2062-sv1',
499                     'timestamp': 1363357591,
500                     'upload_date': '20130315',
501                     'uploader': 'Nature Publishing Group',
502                     'uploader_id': '1964492299001',
503                 },
504             }],
505         },
506         {
507             # Brightcove with UUID in videoPlayer
508             'url': 'http://www8.hp.com/cn/zh/home.html',
509             'info_dict': {
510                 'id': '5255815316001',
511                 'ext': 'mp4',
512                 'title': 'Sprocket Video - China',
513                 'description': 'Sprocket Video - China',
514                 'uploader': 'HP-Video Gallery',
515                 'timestamp': 1482263210,
516                 'upload_date': '20161220',
517                 'uploader_id': '1107601872001',
518             },
519             'params': {
520                 'skip_download': True,  # m3u8 download
521             },
522             'skip': 'video rotates...weekly?',
523         },
524         {
525             # Brightcove:new type [2].
526             'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
527             'md5': '2b35148fcf48da41c9fb4591650784f3',
528             'info_dict': {
529                 'id': '5348741021001',
530                 'ext': 'mp4',
531                 'upload_date': '20170306',
532                 'uploader_id': '4191638492001',
533                 'timestamp': 1488769918,
534                 'title': 'VIDEO:  St. Thomas More earns first trip to basketball semis',
535
536             },
537         },
538         {
539             # Alternative brightcove <video> attributes
540             'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
541             'info_dict': {
542                 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
543                 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
544             },
545             'playlist': [{
546                 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
547                 'info_dict': {
548                     'id': '5311302538001',
549                     'ext': 'mp4',
550                     'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
551                     'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
552                     'timestamp': 1486321708,
553                     'upload_date': '20170205',
554                     'uploader_id': '800000640001',
555                 },
556                 'only_matching': True,
557             }],
558         },
559         {
560             # Brightcove with UUID in videoPlayer
561             'url': 'http://www8.hp.com/cn/zh/home.html',
562             'info_dict': {
563                 'id': '5255815316001',
564                 'ext': 'mp4',
565                 'title': 'Sprocket Video - China',
566                 'description': 'Sprocket Video - China',
567                 'uploader': 'HP-Video Gallery',
568                 'timestamp': 1482263210,
569                 'upload_date': '20161220',
570                 'uploader_id': '1107601872001',
571             },
572             'params': {
573                 'skip_download': True,  # m3u8 download
574             },
575         },
576         # ooyala video
577         {
578             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
579             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
580             'info_dict': {
581                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
582                 'ext': 'mp4',
583                 'title': '2cc213299525360.mov',  # that's what we get
584                 'duration': 238.231,
585             },
586             'add_ie': ['Ooyala'],
587         },
588         {
589             # ooyala video embedded with http://player.ooyala.com/iframe.js
590             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
591             'info_dict': {
592                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
593                 'ext': 'mp4',
594                 'title': '"Steve Jobs: Man in the Machine" trailer',
595                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
596                 'duration': 135.427,
597             },
598             'params': {
599                 'skip_download': True,
600             },
601             'skip': 'movie expired',
602         },
603         # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
604         {
605             'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
606             'info_dict': {
607                 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
608                 'ext': 'mp4',
609                 'title': 'Steampunk Fest Comes to Honesdale',
610                 'duration': 43.276,
611             },
612             'params': {
613                 'skip_download': True,
614             }
615         },
616         # embed.ly video
617         {
618             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
619             'info_dict': {
620                 'id': '9ODmcdjQcHQ',
621                 'ext': 'mp4',
622                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
623                 'upload_date': '20140225',
624                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
625                 'uploader': 'Tested',
626                 'uploader_id': 'testedcom',
627             },
628             # No need to test YoutubeIE here
629             'params': {
630                 'skip_download': True,
631             },
632         },
633         # funnyordie embed
634         {
635             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
636             'info_dict': {
637                 'id': '18e820ec3f',
638                 'ext': 'mp4',
639                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
640                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
641             },
642             # HEAD requests lead to endless 301, while GET is OK
643             'expected_warnings': ['301'],
644         },
645         # RUTV embed
646         {
647             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
648             'info_dict': {
649                 'id': '776940',
650                 'ext': 'mp4',
651                 'title': 'Охотское море стало целиком российским',
652                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
653             },
654             'params': {
655                 # m3u8 download
656                 'skip_download': True,
657             },
658         },
659         # TVC embed
660         {
661             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
662             'info_dict': {
663                 'id': '55304',
664                 'ext': 'mp4',
665                 'title': 'Дошкольное воспитание',
666             },
667         },
668         # SportBox embed
669         {
670             'url': 'http://www.vestifinance.ru/articles/25753',
671             'info_dict': {
672                 'id': '25753',
673                 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
674             },
675             'playlist': [{
676                 'info_dict': {
677                     'id': '370908',
678                     'title': 'Госзаказ. День 3',
679                     'ext': 'mp4',
680                 }
681             }, {
682                 'info_dict': {
683                     'id': '370905',
684                     'title': 'Госзаказ. День 2',
685                     'ext': 'mp4',
686                 }
687             }, {
688                 'info_dict': {
689                     'id': '370902',
690                     'title': 'Госзаказ. День 1',
691                     'ext': 'mp4',
692                 }
693             }],
694             'params': {
695                 # m3u8 download
696                 'skip_download': True,
697             },
698         },
699         # Myvi.ru embed
700         {
701             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
702             'info_dict': {
703                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
704                 'ext': 'mp4',
705                 'title': 'Ужастики, русский трейлер (2015)',
706                 'thumbnail': r're:^https?://.*\.jpg$',
707                 'duration': 153,
708             }
709         },
710         # XHamster embed
711         {
712             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
713             'info_dict': {
714                 'id': 'showthread',
715                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
716             },
717             'playlist_mincount': 7,
718             # This forum does not allow <iframe> syntaxes anymore
719             # Now HTML tags are displayed as-is
720             'skip': 'No videos on this page',
721         },
722         # Embedded TED video
723         {
724             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
725             'md5': '65fdff94098e4a607385a60c5177c638',
726             'info_dict': {
727                 'id': '1969',
728                 'ext': 'mp4',
729                 'title': 'Hidden miracles of the natural world',
730                 'uploader': 'Louie Schwartzberg',
731                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
732             }
733         },
734         # nowvideo embed hidden behind percent encoding
735         {
736             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
737             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
738             'info_dict': {
739                 'id': '06e53103ca9aa',
740                 'ext': 'flv',
741                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
742                 'description': 'No description',
743             },
744         },
745         # arte embed
746         {
747             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
748             'md5': '7653032cbb25bf6c80d80f217055fa43',
749             'info_dict': {
750                 'id': '048195-004_PLUS7-F',
751                 'ext': 'flv',
752                 'title': 'X:enius',
753                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
754                 'upload_date': '20140320',
755             },
756             'params': {
757                 'skip_download': 'Requires rtmpdump'
758             },
759             'skip': 'video gone',
760         },
761         # francetv embed
762         {
763             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
764             'info_dict': {
765                 'id': 'EV_30231',
766                 'ext': 'mp4',
767                 'title': 'Alcaline, le concert avec Calogero',
768                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
769                 'upload_date': '20150226',
770                 'timestamp': 1424989860,
771                 'duration': 5400,
772             },
773             'params': {
774                 # m3u8 downloads
775                 'skip_download': True,
776             },
777             'expected_warnings': [
778                 'Forbidden'
779             ]
780         },
781         # Condé Nast embed
782         {
783             'url': 'http://www.wired.com/2014/04/honda-asimo/',
784             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
785             'info_dict': {
786                 'id': '53501be369702d3275860000',
787                 'ext': 'mp4',
788                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
789             }
790         },
791         # Dailymotion embed
792         {
793             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
794             'md5': '441aeeb82eb72c422c7f14ec533999cd',
795             'info_dict': {
796                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
797                 'ext': 'mp4',
798                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
799                 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
800                 'uploader': 'Spi0n',
801                 'uploader_id': 'xgditw',
802                 'upload_date': '20140425',
803                 'timestamp': 1398441542,
804             },
805             'add_ie': ['Dailymotion'],
806         },
807         # DailyMail embed
808         {
809             'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
810             'info_dict': {
811                 'id': '1495629',
812                 'ext': 'mp4',
813                 'title': 'Care worker punches elderly dementia patient in head 11 times',
814                 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
815             },
816             'add_ie': ['DailyMail'],
817             'params': {
818                 'skip_download': True,
819             },
820         },
821         # YouTube embed
822         {
823             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
824             'info_dict': {
825                 'id': 'FXRb4ykk4S0',
826                 'ext': 'mp4',
827                 'title': 'The NBL Auction 2014',
828                 'uploader': 'BADMINTON England',
829                 'uploader_id': 'BADMINTONEvents',
830                 'upload_date': '20140603',
831                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
832             },
833             'add_ie': ['Youtube'],
834             'params': {
835                 'skip_download': True,
836             }
837         },
838         # MTVSercices embed
839         {
840             'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
841             'md5': 'ca1aef97695ef2c1d6973256a57e5252',
842             'info_dict': {
843                 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
844                 'ext': 'mp4',
845                 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
846                 'description': 'Two valets share their love for movie star Liam Neesons.',
847                 'timestamp': 1349922600,
848                 'upload_date': '20121011',
849             },
850         },
851         # YouTube embed via <data-embed-url="">
852         {
853             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
854             'info_dict': {
855                 'id': '4vAffPZIT44',
856                 'ext': 'mp4',
857                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
858                 'uploader': 'Gameloft',
859                 'uploader_id': 'gameloft',
860                 'upload_date': '20140828',
861                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
862             },
863             'params': {
864                 'skip_download': True,
865             }
866         },
867         # YouTube <object> embed
868         {
869             'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
870             'md5': '516718101ec834f74318df76259fb3cc',
871             'info_dict': {
872                 'id': 'msN87y-iEx0',
873                 'ext': 'webm',
874                 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
875                 'upload_date': '20080526',
876                 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
877                 'uploader': 'Christopher Sykes',
878                 'uploader_id': 'ChristopherJSykes',
879             },
880             'add_ie': ['Youtube'],
881         },
882         # Camtasia studio
883         {
884             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
885             'playlist': [{
886                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
887                 'info_dict': {
888                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
889                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
890                     'ext': 'flv',
891                     'duration': 2235.90,
892                 }
893             }, {
894                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
895                 'info_dict': {
896                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
897                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
898                     'ext': 'flv',
899                     'duration': 2235.93,
900                 }
901             }],
902             'info_dict': {
903                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
904             }
905         },
906         # Flowplayer
907         {
908             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
909             'md5': '9d65602bf31c6e20014319c7d07fba27',
910             'info_dict': {
911                 'id': '5123ea6d5e5a7',
912                 'ext': 'mp4',
913                 'age_limit': 18,
914                 'uploader': 'www.handjobhub.com',
915                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
916             }
917         },
918         # Multiple brightcove videos
919         # https://github.com/rg3/youtube-dl/issues/2283
920         {
921             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
922             'info_dict': {
923                 'id': 'always-never',
924                 'title': 'Always / Never - The New Yorker',
925             },
926             'playlist_count': 3,
927             'params': {
928                 'extract_flat': False,
929                 'skip_download': True,
930             }
931         },
932         # MLB embed
933         {
934             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
935             'md5': '96f09a37e44da40dd083e12d9a683327',
936             'info_dict': {
937                 'id': '33322633',
938                 'ext': 'mp4',
939                 'title': 'Ump changes call to ball',
940                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
941                 'duration': 48,
942                 'timestamp': 1401537900,
943                 'upload_date': '20140531',
944                 'thumbnail': r're:^https?://.*\.jpg$',
945             },
946         },
947         # Wistia embed
948         {
949             'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
950             'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
951             'info_dict': {
952                 'id': '6e2wtrbdaf',
953                 'ext': 'mov',
954                 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
955                 'description': 'a Paywall Videos video from Remilon',
956                 'duration': 644.072,
957                 'uploader': 'study.com',
958                 'timestamp': 1459678540,
959                 'upload_date': '20160403',
960                 'filesize': 24687186,
961             },
962         },
963         {
964             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
965             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
966             'info_dict': {
967                 'id': 'uxjb0lwrcz',
968                 'ext': 'mp4',
969                 'title': 'Conversation about Hexagonal Rails Part 1',
970                 'description': 'a Martin Fowler video from ThoughtWorks',
971                 'duration': 1715.0,
972                 'uploader': 'thoughtworks.wistia.com',
973                 'timestamp': 1401832161,
974                 'upload_date': '20140603',
975             },
976         },
977         # Wistia standard embed (async)
978         {
979             'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
980             'info_dict': {
981                 'id': '807fafadvk',
982                 'ext': 'mp4',
983                 'title': 'Drip Brennan Dunn Workshop',
984                 'description': 'a JV Webinars video from getdrip-1',
985                 'duration': 4986.95,
986                 'timestamp': 1463607249,
987                 'upload_date': '20160518',
988             },
989             'params': {
990                 'skip_download': True,
991             }
992         },
993         # Soundcloud embed
994         {
995             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
996             'info_dict': {
997                 'id': '174391317',
998                 'ext': 'mp3',
999                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
1000                 'uploader': 'Sophos Security',
1001                 'title': 'Chet Chat 171 - Oct 29, 2014',
1002                 'upload_date': '20141029',
1003             }
1004         },
1005         # Soundcloud multiple embeds
1006         {
1007             'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
1008             'info_dict': {
1009                 'id': '52809',
1010                 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance  | TAB + AUDIO',
1011             },
1012             'playlist_mincount': 7,
1013         },
1014         # TuneIn station embed
1015         {
1016             'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
1017             'info_dict': {
1018                 'id': '204146',
1019                 'ext': 'mp3',
1020                 'title': 'CNRV',
1021                 'location': 'Paris, France',
1022                 'is_live': True,
1023             },
1024             'params': {
1025                 # Live stream
1026                 'skip_download': True,
1027             },
1028         },
1029         # Livestream embed
1030         {
1031             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
1032             'info_dict': {
1033                 'id': '67864563',
1034                 'ext': 'flv',
1035                 'upload_date': '20141112',
1036                 'title': 'Rosetta #CometLanding webcast HL 10',
1037             }
1038         },
1039         # Another Livestream embed, without 'new.' in URL
1040         {
1041             'url': 'https://www.freespeech.org/',
1042             'info_dict': {
1043                 'id': '123537347',
1044                 'ext': 'mp4',
1045                 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
1046             },
1047             'params': {
1048                 # Live stream
1049                 'skip_download': True,
1050             },
1051         },
1052         # LazyYT
1053         {
1054             'url': 'https://skiplagged.com/',
1055             'info_dict': {
1056                 'id': 'skiplagged',
1057                 'title': 'Skiplagged: The smart way to find cheap flights',
1058             },
1059             'playlist_mincount': 1,
1060             'add_ie': ['Youtube'],
1061         },
1062         # Cinchcast embed
1063         {
1064             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
1065             'info_dict': {
1066                 'id': '7141703',
1067                 'ext': 'mp3',
1068                 'upload_date': '20141126',
1069                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
1070             }
1071         },
1072         # Cinerama player
1073         {
1074             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
1075             'info_dict': {
1076                 'id': '730m_DandD_1901_512k',
1077                 'ext': 'mp4',
1078                 'uploader': 'www.abc.net.au',
1079                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
1080             }
1081         },
1082         # embedded viddler video
1083         {
1084             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
1085             'info_dict': {
1086                 'id': '4d03aad9',
1087                 'ext': 'mp4',
1088                 'uploader': 'deadspin',
1089                 'title': 'WALL-TO-GORTAT',
1090                 'timestamp': 1422285291,
1091                 'upload_date': '20150126',
1092             },
1093             'add_ie': ['Viddler'],
1094         },
1095         # Libsyn embed
1096         {
1097             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
1098             'info_dict': {
1099                 'id': '3377616',
1100                 'ext': 'mp3',
1101                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
1102                 'description': 'md5:601cb790edd05908957dae8aaa866465',
1103                 'upload_date': '20150220',
1104             },
1105             'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
1106         },
1107         # jwplayer YouTube
1108         {
1109             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
1110             'info_dict': {
1111                 'id': 'Mrj4DVp2zeA',
1112                 'ext': 'mp4',
1113                 'upload_date': '20150212',
1114                 'uploader': 'The National Archives UK',
1115                 'description': 'md5:8078af856dca76edc42910b61273dbbf',
1116                 'uploader_id': 'NationalArchives08',
1117                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
1118             },
1119         },
1120         # jwplayer rtmp
1121         {
1122             'url': 'http://www.suffolk.edu/sjc/live.php',
1123             'info_dict': {
1124                 'id': 'live',
1125                 'ext': 'flv',
1126                 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
1127                 'uploader': 'www.suffolk.edu',
1128             },
1129             'params': {
1130                 'skip_download': True,
1131             },
1132             'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
1133         },
1134         # Complex jwplayer
1135         {
1136             'url': 'http://www.indiedb.com/games/king-machine/videos',
1137             'info_dict': {
1138                 'id': 'videos',
1139                 'ext': 'mp4',
1140                 'title': 'king machine trailer 1',
1141                 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
1142                 'thumbnail': r're:^https?://.*\.jpg$',
1143             },
1144         },
1145         {
1146             # JWPlayer config passed as variable
1147             'url': 'http://www.txxx.com/videos/3326530/ariele/',
1148             'info_dict': {
1149                 'id': '3326530_hq',
1150                 'ext': 'mp4',
1151                 'title': 'ARIELE | Tube Cup',
1152                 'uploader': 'www.txxx.com',
1153                 'age_limit': 18,
1154             },
1155             'params': {
1156                 'skip_download': True,
1157             }
1158         },
1159         {
1160             # JWPlatform iframe
1161             'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
1162             'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
1163             'info_dict': {
1164                 'id': 'O0c5JcKT',
1165                 'ext': 'mp4',
1166                 'upload_date': '20171122',
1167                 'timestamp': 1511366290,
1168                 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
1169             },
1170             'add_ie': [JWPlatformIE.ie_key()],
1171         },
1172         {
1173             # Video.js embed, multiple formats
1174             'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
1175             'info_dict': {
1176                 'id': 'yygqldloqIk',
1177                 'ext': 'mp4',
1178                 'title': 'SolidWorks. Урок 6 Настройка чертежа',
1179                 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
1180                 'upload_date': '20130314',
1181                 'uploader': 'PROстое3D',
1182                 'uploader_id': 'PROstoe3D',
1183             },
1184             'params': {
1185                 'skip_download': True,
1186             },
1187         },
1188         {
1189             # Video.js embed, single format
1190             'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
1191             'info_dict': {
1192                 'id': 'watch',
1193                 'ext': 'mp4',
1194                 'title': 'Step 1 -  Good Foundation',
1195                 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
1196             },
1197             'params': {
1198                 'skip_download': True,
1199             },
1200         },
1201         # rtl.nl embed
1202         {
1203             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
1204             'playlist_mincount': 5,
1205             'info_dict': {
1206                 'id': 'aanslagen-kopenhagen',
1207                 'title': 'Aanslagen Kopenhagen',
1208             }
1209         },
1210         # Zapiks embed
1211         {
1212             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
1213             'info_dict': {
1214                 'id': '118046',
1215                 'ext': 'mp4',
1216                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
1217             }
1218         },
1219         # Kaltura embed (different embed code)
1220         {
1221             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
1222             'info_dict': {
1223                 'id': '1_a52wc67y',
1224                 'ext': 'flv',
1225                 'upload_date': '20150127',
1226                 'uploader_id': 'PremierMedia',
1227                 'timestamp': int,
1228                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
1229             },
1230         },
1231         # Kaltura embed with single quotes
1232         {
1233             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
1234             'info_dict': {
1235                 'id': '0_izeg5utt',
1236                 'ext': 'mp4',
1237                 'title': '35871',
1238                 'timestamp': 1355743100,
1239                 'upload_date': '20121217',
1240                 'uploader_id': 'cplapp@learn360.com',
1241             },
1242             'add_ie': ['Kaltura'],
1243         },
1244         {
1245             # Kaltura embedded via quoted entry_id
1246             'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1247             'info_dict': {
1248                 'id': '0_utuok90b',
1249                 'ext': 'mp4',
1250                 'title': '06_matthew_brender_raj_dutt',
1251                 'timestamp': 1466638791,
1252                 'upload_date': '20160622',
1253             },
1254             'add_ie': ['Kaltura'],
1255             'expected_warnings': [
1256                 'Could not send HEAD request'
1257             ],
1258             'params': {
1259                 'skip_download': True,
1260             }
1261         },
1262         {
1263             # Kaltura embedded, some fileExt broken (#11480)
1264             'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1265             'info_dict': {
1266                 'id': '1_sgtvehim',
1267                 'ext': 'mp4',
1268                 'title': 'Our "Standard Models" of particle physics and cosmology',
1269                 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1270                 'timestamp': 1321158993,
1271                 'upload_date': '20111113',
1272                 'uploader_id': 'kps1',
1273             },
1274             'add_ie': ['Kaltura'],
1275         },
1276         {
1277             # Kaltura iframe embed
1278             'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
1279             'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
1280             'info_dict': {
1281                 'id': '0_f2cfbpwy',
1282                 'ext': 'mp4',
1283                 'title': 'I. M. Pei: A Centennial Celebration',
1284                 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
1285                 'upload_date': '20170403',
1286                 'uploader_id': 'batchUser',
1287                 'timestamp': 1491232186,
1288             },
1289             'add_ie': ['Kaltura'],
1290         },
1291         {
1292             # Kaltura iframe embed, more sophisticated
1293             'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
1294             'info_dict': {
1295                 'id': '1_9gzouybz',
1296                 'ext': 'mp4',
1297                 'title': 'lecture-05sep2017',
1298                 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
1299                 'upload_date': '20170913',
1300                 'uploader_id': 'eps2',
1301                 'timestamp': 1505340777,
1302             },
1303             'params': {
1304                 'skip_download': True,
1305             },
1306             'add_ie': ['Kaltura'],
1307         },
1308         {
1309             # meta twitter:player
1310             'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
1311             'info_dict': {
1312                 'id': '0_01b42zps',
1313                 'ext': 'mp4',
1314                 'title': 'Main Twerk (Video)',
1315                 'upload_date': '20171208',
1316                 'uploader_id': 'sebastian.salinas@thechive.com',
1317                 'timestamp': 1512713057,
1318             },
1319             'params': {
1320                 'skip_download': True,
1321             },
1322             'add_ie': ['Kaltura'],
1323         },
1324         # referrer protected EaglePlatform embed
1325         {
1326             'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
1327             'info_dict': {
1328                 'id': '582306',
1329                 'ext': 'mp4',
1330                 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
1331                 'thumbnail': r're:^https?://.*\.jpg$',
1332                 'duration': 3382,
1333                 'view_count': int,
1334             },
1335             'params': {
1336                 'skip_download': True,
1337             },
1338         },
1339         # ClipYou (EaglePlatform) embed (custom URL)
1340         {
1341             'url': 'http://muz-tv.ru/play/7129/',
1342             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1343             'info_dict': {
1344                 'id': '12820',
1345                 'ext': 'mp4',
1346                 'title': "'O Sole Mio",
1347                 'thumbnail': r're:^https?://.*\.jpg$',
1348                 'duration': 216,
1349                 'view_count': int,
1350             },
1351             'params': {
1352                 'skip_download': True,
1353             },
1354             'skip': 'This video is unavailable.',
1355         },
1356         # Pladform embed
1357         {
1358             'url': 'http://muz-tv.ru/kinozal/view/7400/',
1359             'info_dict': {
1360                 'id': '100183293',
1361                 'ext': 'mp4',
1362                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
1363                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
1364                 'thumbnail': r're:^https?://.*\.jpg$',
1365                 'duration': 694,
1366                 'age_limit': 0,
1367             },
1368             'skip': 'HTTP Error 404: Not Found',
1369         },
1370         # Playwire embed
1371         {
1372             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1373             'info_dict': {
1374                 'id': '3519514',
1375                 'ext': 'mp4',
1376                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
1377                 'thumbnail': r're:^https?://.*\.png$',
1378                 'duration': 45.115,
1379             },
1380         },
1381         # 5min embed
1382         {
1383             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1384             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1385             'info_dict': {
1386                 'id': '518726732',
1387                 'ext': 'mp4',
1388                 'title': 'Facebook Creates "On This Day" | Crunch Report',
1389                 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
1390                 'timestamp': 1427237531,
1391                 'uploader': 'Crunch Report',
1392                 'upload_date': '20150324',
1393             },
1394             'params': {
1395                 # m3u8 download
1396                 'skip_download': True,
1397             },
1398         },
1399         # Crooks and Liars embed
1400         {
1401             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1402             'info_dict': {
1403                 'id': '8RUoRhRi',
1404                 'ext': 'mp4',
1405                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1406                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1407                 'timestamp': 1428207000,
1408                 'upload_date': '20150405',
1409                 'uploader': 'Heather',
1410             },
1411         },
1412         # Crooks and Liars external embed
1413         {
1414             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1415             'info_dict': {
1416                 'id': 'MTE3MjUtMzQ2MzA',
1417                 'ext': 'mp4',
1418                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1419                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1420                 'timestamp': 1265032391,
1421                 'upload_date': '20100201',
1422                 'uploader': 'Heather',
1423             },
1424         },
1425         # NBC Sports vplayer embed
1426         {
1427             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1428             'info_dict': {
1429                 'id': 'ln7x1qSThw4k',
1430                 'ext': 'flv',
1431                 'title': "PFT Live: New leader in the 'new-look' defense",
1432                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1433                 'uploader': 'NBCU-SPORTS',
1434                 'upload_date': '20140107',
1435                 'timestamp': 1389118457,
1436             },
1437             'skip': 'Invalid Page URL',
1438         },
1439         # NBC News embed
1440         {
1441             'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1442             'md5': '1aa589c675898ae6d37a17913cf68d66',
1443             'info_dict': {
1444                 'id': 'x_dtl_oa_LettermanliftPR_160608',
1445                 'ext': 'mp4',
1446                 'title': 'David Letterman: A Preview',
1447                 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
1448                 'upload_date': '20160609',
1449                 'timestamp': 1465431544,
1450                 'uploader': 'NBCU-NEWS',
1451             },
1452         },
1453         # UDN embed
1454         {
1455             'url': 'https://video.udn.com/news/300346',
1456             'md5': 'fd2060e988c326991037b9aff9df21a6',
1457             'info_dict': {
1458                 'id': '300346',
1459                 'ext': 'mp4',
1460                 'title': '中一中男師變性 全校師生力挺',
1461                 'thumbnail': r're:^https?://.*\.jpg$',
1462             },
1463             'params': {
1464                 # m3u8 download
1465                 'skip_download': True,
1466             },
1467             'expected_warnings': ['Failed to parse JSON Expecting value'],
1468         },
1469         # Brightcove URL in single quotes
1470         {
1471             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1472             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1473             'info_dict': {
1474                 'id': '4255764656001',
1475                 'ext': 'mp4',
1476                 'title': 'SN Presents: Russell Martin, World Citizen',
1477                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1478                 'uploader': 'Rogers Sportsnet',
1479                 'uploader_id': '1704050871',
1480                 'upload_date': '20150525',
1481                 'timestamp': 1432570283,
1482             },
1483         },
1484         # OnionStudios embed
1485         {
1486             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1487             'info_dict': {
1488                 'id': '2855',
1489                 'ext': 'mp4',
1490                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1491                 'thumbnail': r're:^https?://.*\.jpe?g$',
1492                 'uploader': 'ClickHole',
1493                 'uploader_id': 'clickhole',
1494             }
1495         },
1496         # SnagFilms embed
1497         {
1498             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1499             'info_dict': {
1500                 'id': '74849a00-85a9-11e1-9660-123139220831',
1501                 'ext': 'mp4',
1502                 'title': '#whilewewatch',
1503             }
1504         },
1505         # AdobeTVVideo embed
1506         {
1507             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1508             'md5': '43662b577c018ad707a63766462b1e87',
1509             'info_dict': {
1510                 'id': '2456',
1511                 'ext': 'mp4',
1512                 'title': 'New experience with Acrobat DC',
1513                 'description': 'New experience with Acrobat DC',
1514                 'duration': 248.667,
1515             },
1516         },
1517         # BrightcoveInPageEmbed embed
1518         {
1519             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1520             'info_dict': {
1521                 'id': '4238694884001',
1522                 'ext': 'flv',
1523                 'title': 'Tabletop: Dread, Last Thoughts',
1524                 'description': 'Tabletop: Dread, Last Thoughts',
1525                 'duration': 51690,
1526             },
1527         },
1528         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1529         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1530         {
1531             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1532             'info_dict': {
1533                 'id': '4785848093001',
1534                 'ext': 'mp4',
1535                 'title': 'The Cardinal Pell Interview',
1536                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1537                 'uploader': 'GlobeCast Australia - GlobeStream',
1538                 'uploader_id': '2733773828001',
1539                 'upload_date': '20160304',
1540                 'timestamp': 1457083087,
1541             },
1542             'params': {
1543                 # m3u8 downloads
1544                 'skip_download': True,
1545             },
1546         },
1547         {
1548             # Brightcove embed with whitespace around attribute names
1549             'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
1550             'info_dict': {
1551                 'id': '3167554373001',
1552                 'ext': 'mp4',
1553                 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
1554                 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
1555                 'uploader_id': '1079349493',
1556                 'upload_date': '20140207',
1557                 'timestamp': 1391810548,
1558             },
1559             'params': {
1560                 'skip_download': True,
1561             },
1562         },
1563         # Another form of arte.tv embed
1564         {
1565             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1566             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1567             'info_dict': {
1568                 'id': '030273-562_PLUS7-F',
1569                 'ext': 'mp4',
1570                 'title': 'ARTE Reportage - Nulle part, en France',
1571                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1572                 'upload_date': '20160409',
1573             },
1574         },
1575         # LiveLeak embed
1576         {
1577             'url': 'http://www.wykop.pl/link/3088787/',
1578             'md5': '7619da8c820e835bef21a1efa2a0fc71',
1579             'info_dict': {
1580                 'id': '874_1459135191',
1581                 'ext': 'mp4',
1582                 'title': 'Man shows poor quality of new apartment building',
1583                 'description': 'The wall is like a sand pile.',
1584                 'uploader': 'Lake8737',
1585             },
1586             'add_ie': [LiveLeakIE.ie_key()],
1587         },
1588         # Another LiveLeak embed pattern (#13336)
1589         {
1590             'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
1591             'info_dict': {
1592                 'id': '2eb_1496309988',
1593                 'ext': 'mp4',
1594                 'title': 'Thief robs place where everyone was armed',
1595                 'description': 'md5:694d73ee79e535953cf2488562288eee',
1596                 'uploader': 'brazilwtf',
1597             },
1598             'add_ie': [LiveLeakIE.ie_key()],
1599         },
1600         # Duplicated embedded video URLs
1601         {
1602             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1603             'info_dict': {
1604                 'id': '149298443_480_16c25b74_2',
1605                 'ext': 'mp4',
1606                 'title': 'vs. Blue Orange Spring Game',
1607                 'uploader': 'www.hudl.com',
1608             },
1609         },
1610         # twitter:player:stream embed
1611         {
1612             'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1613             'info_dict': {
1614                 'id': 'master',
1615                 'ext': 'mp4',
1616                 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1617                 'uploader': 'www.rtl.be',
1618             },
1619             'params': {
1620                 # m3u8 downloads
1621                 'skip_download': True,
1622             },
1623         },
1624         # twitter:player embed
1625         {
1626             'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1627             'md5': 'a3e0df96369831de324f0778e126653c',
1628             'info_dict': {
1629                 'id': '4909620399001',
1630                 'ext': 'mp4',
1631                 'title': 'What Do Black Holes Sound Like?',
1632                 'description': 'what do black holes sound like',
1633                 'upload_date': '20160524',
1634                 'uploader_id': '29913724001',
1635                 'timestamp': 1464107587,
1636                 'uploader': 'TheAtlantic',
1637             },
1638             'add_ie': ['BrightcoveLegacy'],
1639         },
1640         # Facebook <iframe> embed
1641         {
1642             'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
1643             'md5': 'fbcde74f534176ecb015849146dd3aee',
1644             'info_dict': {
1645                 'id': '599637780109885',
1646                 'ext': 'mp4',
1647                 'title': 'Facebook video #599637780109885',
1648             },
1649         },
1650         # Facebook <iframe> embed, plugin video
1651         {
1652             'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
1653             'info_dict': {
1654                 'id': '1754168231264132',
1655                 'ext': 'mp4',
1656                 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
1657                 'uploader': 'Tariq Ramadan (official)',
1658                 'timestamp': 1496758379,
1659                 'upload_date': '20170606',
1660             },
1661             'params': {
1662                 'skip_download': True,
1663             },
1664         },
1665         # Facebook API embed
1666         {
1667             'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
1668             'md5': 'a47372ee61b39a7b90287094d447d94e',
1669             'info_dict': {
1670                 'id': '10153467542406923',
1671                 'ext': 'mp4',
1672                 'title': 'Facebook video #10153467542406923',
1673             },
1674         },
1675         # Wordpress "YouTube Video Importer" plugin
1676         {
1677             'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
1678             'md5': 'd16797741b560b485194eddda8121b48',
1679             'info_dict': {
1680                 'id': 'HNTXWDXV9Is',
1681                 'ext': 'mp4',
1682                 'title': 'Blue Devils Drumline Stanford lot 2016',
1683                 'upload_date': '20160627',
1684                 'uploader_id': 'GENOCIDE8GENERAL10',
1685                 'uploader': 'cylus cyrus',
1686             },
1687         },
1688         {
1689             # video stored on custom kaltura server
1690             'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1691             'md5': '537617d06e64dfed891fa1593c4b30cc',
1692             'info_dict': {
1693                 'id': '0_1iotm5bh',
1694                 'ext': 'mp4',
1695                 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1696                 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1697                 'uploader_id': 'videos.expansion@el-mundo.net',
1698                 'upload_date': '20150429',
1699                 'timestamp': 1430303472,
1700             },
1701             'add_ie': ['Kaltura'],
1702         },
1703         {
1704             # Non-standard Vimeo embed
1705             'url': 'https://openclassrooms.com/courses/understanding-the-web',
1706             'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1707             'info_dict': {
1708                 'id': '148867247',
1709                 'ext': 'mp4',
1710                 'title': 'Understanding the web - Teaser',
1711                 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1712                 'upload_date': '20151214',
1713                 'uploader': 'OpenClassrooms',
1714                 'uploader_id': 'openclassrooms',
1715             },
1716             'add_ie': ['Vimeo'],
1717         },
1718         {
1719             # generic vimeo embed that requires original URL passed as Referer
1720             'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1721             'only_matching': True,
1722         },
1723         {
1724             'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1725             'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1726             'info_dict': {
1727                 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1728                 'ext': 'mp4',
1729                 'title': 'Big Buck Bunny',
1730                 'description': 'Royalty free test video',
1731                 'timestamp': 1432816365,
1732                 'upload_date': '20150528',
1733                 'is_live': False,
1734             },
1735             'params': {
1736                 'skip_download': True,
1737             },
1738             'add_ie': [ArkenaIE.ie_key()],
1739         },
1740         {
1741             'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1742             'info_dict': {
1743                 'id': '1c7141f46c',
1744                 'ext': 'mp4',
1745                 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1746             },
1747             'params': {
1748                 'skip_download': True,
1749             },
1750             'add_ie': [Vbox7IE.ie_key()],
1751         },
1752         {
1753             # DBTV embeds
1754             'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
1755             'info_dict': {
1756                 'id': '43254897',
1757                 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1758             },
1759             'playlist_mincount': 3,
1760         },
1761         {
1762             # Videa embeds
1763             'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1764             'info_dict': {
1765                 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1766                 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1767             },
1768             'playlist_mincount': 2,
1769         },
1770         {
1771             # 20 minuten embed
1772             'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1773             'info_dict': {
1774                 'id': '523629',
1775                 'ext': 'mp4',
1776                 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1777                 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1778             },
1779             'params': {
1780                 'skip_download': True,
1781             },
1782             'add_ie': [TwentyMinutenIE.ie_key()],
1783         },
1784         {
1785             # VideoPress embed
1786             'url': 'https://en.support.wordpress.com/videopress/',
1787             'info_dict': {
1788                 'id': 'OcobLTqC',
1789                 'ext': 'm4v',
1790                 'title': 'IMG_5786',
1791                 'timestamp': 1435711927,
1792                 'upload_date': '20150701',
1793             },
1794             'params': {
1795                 'skip_download': True,
1796             },
1797             'add_ie': [VideoPressIE.ie_key()],
1798         },
1799         {
1800             # Rutube embed
1801             'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
1802             'info_dict': {
1803                 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
1804                 'ext': 'flv',
1805                 'title': 'Магаззино: Казань 2',
1806                 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
1807                 'uploader': 'Магаззино',
1808                 'upload_date': '20170228',
1809                 'uploader_id': '996642',
1810             },
1811             'params': {
1812                 'skip_download': True,
1813             },
1814             'add_ie': [RutubeIE.ie_key()],
1815         },
1816         {
1817             # ThePlatform embedded with whitespaces in URLs
1818             'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
1819             'only_matching': True,
1820         },
1821         {
1822             # Senate ISVP iframe https
1823             'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
1824             'md5': 'fb8c70b0b515e5037981a2492099aab8',
1825             'info_dict': {
1826                 'id': 'govtaff020316',
1827                 'ext': 'mp4',
1828                 'title': 'Integrated Senate Video Player',
1829             },
1830             'add_ie': [SenateISVPIE.ie_key()],
1831         },
1832         {
1833             # Limelight embeds (1 channel embed + 4 media embeds)
1834             'url': 'http://www.sedona.com/FacilitatorTraining2017',
1835             'info_dict': {
1836                 'id': 'FacilitatorTraining2017',
1837                 'title': 'Facilitator Training 2017',
1838             },
1839             'playlist_mincount': 5,
1840         },
1841         {
1842             # Limelight embed (LimelightPlayerUtil.embed)
1843             'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
1844             'info_dict': {
1845                 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
1846                 'ext': 'mp4',
1847                 'title': '07448641',
1848                 'timestamp': 1499890639,
1849                 'upload_date': '20170712',
1850             },
1851             'params': {
1852                 'skip_download': True,
1853             },
1854             'add_ie': ['LimelightMedia'],
1855         },
1856         {
1857             'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
1858             'info_dict': {
1859                 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
1860                 'title': 'Standoff with Walnut Creek murder suspect ends',
1861                 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
1862             },
1863             'playlist_mincount': 4,
1864         },
1865         {
1866             # WashingtonPost embed
1867             'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
1868             'info_dict': {
1869                 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
1870                 'ext': 'mp4',
1871                 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
1872                 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
1873                 'timestamp': 1455216756,
1874                 'uploader': 'The Washington Post',
1875                 'upload_date': '20160211',
1876             },
1877             'add_ie': [WashingtonPostIE.ie_key()],
1878         },
1879         {
1880             # Mediaset embed
1881             'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
1882             'info_dict': {
1883                 'id': '720642',
1884                 'ext': 'mp4',
1885                 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
1886             },
1887             'params': {
1888                 'skip_download': True,
1889             },
1890             'add_ie': [MediasetIE.ie_key()],
1891         },
1892         {
1893             # JOJ.sk embeds
1894             'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
1895             'info_dict': {
1896                 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
1897                 'title': 'Slovenskom sa prehnala vlna silných búrok',
1898             },
1899             'playlist_mincount': 5,
1900             'add_ie': [JojIE.ie_key()],
1901         },
1902         {
1903             # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
1904             'url': 'https://tvrain.ru/amp/418921/',
1905             'md5': 'cc00413936695987e8de148b67d14f1d',
1906             'info_dict': {
1907                 'id': '418921',
1908                 'ext': 'mp4',
1909                 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
1910             },
1911         },
1912         {
1913             # vzaar embed
1914             'url': 'http://help.vzaar.com/article/165-embedding-video',
1915             'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
1916             'info_dict': {
1917                 'id': '8707641',
1918                 'ext': 'mp4',
1919                 'title': 'Building A Business Online: Principal Chairs Q & A',
1920             },
1921         },
1922         {
1923             # multiple HTML5 videos on one page
1924             'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
1925             'info_dict': {
1926                 'id': 'keyscenarios',
1927                 'title': 'Rescue Kit 14 Free Edition - Getting started',
1928             },
1929             'playlist_count': 4,
1930         },
1931         {
1932             # vshare embed
1933             'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
1934             'md5': '17b39f55b5497ae8b59f5fbce8e35886',
1935             'info_dict': {
1936                 'id': '0f64ce6',
1937                 'title': 'vl14062007715967',
1938                 'ext': 'mp4',
1939             }
1940         },
1941         {
1942             'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
1943             'md5': 'aecd089f55b1cb5a59032cb049d3a356',
1944             'info_dict': {
1945                 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
1946                 'ext': 'mp4',
1947                 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
1948                 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
1949                 'timestamp': 1474354800,
1950                 'upload_date': '20160920',
1951             }
1952         },
1953         {
1954             'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
1955             'info_dict': {
1956                 'id': '1731611',
1957                 'ext': 'mp4',
1958                 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
1959                 'description': 'md5:eb5f23826a027ba95277d105f248b825',
1960                 'timestamp': 1516100691,
1961                 'upload_date': '20180116',
1962             },
1963             'params': {
1964                 'skip_download': True,
1965             },
1966             'add_ie': [SpringboardPlatformIE.ie_key()],
1967         },
1968         {
1969             'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
1970             'info_dict': {
1971                 'id': 'uPDB5I9wfp8',
1972                 'ext': 'webm',
1973                 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
1974                 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
1975                 'upload_date': '20160219',
1976                 'uploader': 'Pocoyo - Português (BR)',
1977                 'uploader_id': 'PocoyoBrazil',
1978             },
1979             'add_ie': [YoutubeIE.ie_key()],
1980             'params': {
1981                 'skip_download': True,
1982             },
1983         },
1984         {
1985             'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
1986             'info_dict': {
1987                 'id': 'vMDE4NzI1Mjgt690b',
1988                 'ext': 'mp4',
1989                 'title': 'Котята',
1990             },
1991             'add_ie': [YapFilesIE.ie_key()],
1992             'params': {
1993                 'skip_download': True,
1994             },
1995         },
1996         {
1997             # CloudflareStream embed
1998             'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
1999             'info_dict': {
2000                 'id': '31c9291ab41fac05471db4e73aa11717',
2001                 'ext': 'mp4',
2002                 'title': '31c9291ab41fac05471db4e73aa11717',
2003             },
2004             'add_ie': [CloudflareStreamIE.ie_key()],
2005             'params': {
2006                 'skip_download': True,
2007             },
2008         },
2009         {
2010             # PeerTube embed
2011             'url': 'https://joinpeertube.org/fr/home/',
2012             'info_dict': {
2013                 'id': 'home',
2014                 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
2015             },
2016             'playlist_count': 2,
2017         },
2018         {
2019             # Indavideo embed
2020             'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
2021             'info_dict': {
2022                 'id': '1693903',
2023                 'ext': 'mp4',
2024                 'title': 'Így kell otthon hamburgert sütni',
2025                 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
2026                 'timestamp': 1426330212,
2027                 'upload_date': '20150314',
2028                 'uploader': 'StreetKitchen',
2029                 'uploader_id': '546363',
2030             },
2031             'add_ie': [IndavideoEmbedIE.ie_key()],
2032             'params': {
2033                 'skip_download': True,
2034             },
2035         },
2036         {
2037             # APA embed via JWPlatform embed
2038             'url': 'http://www.vol.at/blue-man-group/5593454',
2039             'info_dict': {
2040                 'id': 'jjv85FdZ',
2041                 'ext': 'mp4',
2042                 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
2043                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2044                 'thumbnail': r're:^https?://.*\.jpg$',
2045                 'duration': 254,
2046                 'timestamp': 1519211149,
2047                 'upload_date': '20180221',
2048             },
2049             'params': {
2050                 'skip_download': True,
2051             },
2052         },
2053         {
2054             'url': 'http://share-videos.se/auto/video/83645793?uid=13',
2055             'md5': 'b68d276de422ab07ee1d49388103f457',
2056             'info_dict': {
2057                 'id': '83645793',
2058                 'title': 'Lock up and get excited',
2059                 'ext': 'mp4'
2060             },
2061             'skip': 'TODO: fix nested playlists processing in tests',
2062         },
2063         # {
2064         #     # TODO: find another test
2065         #     # http://schema.org/VideoObject
2066         #     'url': 'https://flipagram.com/f/nyvTSJMKId',
2067         #     'md5': '888dcf08b7ea671381f00fab74692755',
2068         #     'info_dict': {
2069         #         'id': 'nyvTSJMKId',
2070         #         'ext': 'mp4',
2071         #         'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
2072         #         'description': '#love for cats.',
2073         #         'timestamp': 1461244995,
2074         #         'upload_date': '20160421',
2075         #     },
2076         #     'params': {
2077         #         'force_generic_extractor': True,
2078         #     },
2079         # }
2080     ]
2081
2082     def report_following_redirect(self, new_url):
2083         """Report information extraction."""
2084         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
2085
2086     def _extract_rss(self, url, video_id, doc):
2087         playlist_title = doc.find('./channel/title').text
2088         playlist_desc_el = doc.find('./channel/description')
2089         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
2090
2091         entries = []
2092         for it in doc.findall('./channel/item'):
2093             next_url = None
2094             enclosure_nodes = it.findall('./enclosure')
2095             for e in enclosure_nodes:
2096                 next_url = e.attrib.get('url')
2097                 if next_url:
2098                     break
2099
2100             if not next_url:
2101                 next_url = xpath_text(it, 'link', fatal=False)
2102
2103             if not next_url:
2104                 continue
2105
2106             entries.append({
2107                 '_type': 'url_transparent',
2108                 'url': next_url,
2109                 'title': it.find('title').text,
2110             })
2111
2112         return {
2113             '_type': 'playlist',
2114             'id': url,
2115             'title': playlist_title,
2116             'description': playlist_desc,
2117             'entries': entries,
2118         }
2119
2120     def _extract_camtasia(self, url, video_id, webpage):
2121         """ Returns None if no camtasia video can be found. """
2122
2123         camtasia_cfg = self._search_regex(
2124             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
2125             webpage, 'camtasia configuration file', default=None)
2126         if camtasia_cfg is None:
2127             return None
2128
2129         title = self._html_search_meta('DC.title', webpage, fatal=True)
2130
2131         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
2132         camtasia_cfg = self._download_xml(
2133             camtasia_url, video_id,
2134             note='Downloading camtasia configuration',
2135             errnote='Failed to download camtasia configuration')
2136         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
2137
2138         entries = []
2139         for n in fileset_node.getchildren():
2140             url_n = n.find('./uri')
2141             if url_n is None:
2142                 continue
2143
2144             entries.append({
2145                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
2146                 'title': '%s - %s' % (title, n.tag),
2147                 'url': compat_urlparse.urljoin(url, url_n.text),
2148                 'duration': float_or_none(n.find('./duration').text),
2149             })
2150
2151         return {
2152             '_type': 'playlist',
2153             'entries': entries,
2154             'title': title,
2155         }
2156
2157     def _real_extract(self, url):
2158         if url.startswith('//'):
2159             return {
2160                 '_type': 'url',
2161                 'url': self.http_scheme() + url,
2162             }
2163
2164         parsed_url = compat_urlparse.urlparse(url)
2165         if not parsed_url.scheme:
2166             default_search = self._downloader.params.get('default_search')
2167             if default_search is None:
2168                 default_search = 'fixup_error'
2169
2170             if default_search in ('auto', 'auto_warning', 'fixup_error'):
2171                 if '/' in url:
2172                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
2173                     return self.url_result('http://' + url)
2174                 elif default_search != 'fixup_error':
2175                     if default_search == 'auto_warning':
2176                         if re.match(r'^(?:url|URL)$', url):
2177                             raise ExtractorError(
2178                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
2179                                 expected=True)
2180                         else:
2181                             self._downloader.report_warning(
2182                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
2183                     return self.url_result('ytsearch:' + url)
2184
2185             if default_search in ('error', 'fixup_error'):
2186                 raise ExtractorError(
2187                     '%r is not a valid URL. '
2188                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
2189                     % (url, url), expected=True)
2190             else:
2191                 if ':' not in default_search:
2192                     default_search += ':'
2193                 return self.url_result(default_search + url)
2194
2195         url, smuggled_data = unsmuggle_url(url)
2196         force_videoid = None
2197         is_intentional = smuggled_data and smuggled_data.get('to_generic')
2198         if smuggled_data and 'force_videoid' in smuggled_data:
2199             force_videoid = smuggled_data['force_videoid']
2200             video_id = force_videoid
2201         else:
2202             video_id = self._generic_id(url)
2203
2204         self.to_screen('%s: Requesting header' % video_id)
2205
2206         head_req = HEADRequest(url)
2207         head_response = self._request_webpage(
2208             head_req, video_id,
2209             note=False, errnote='Could not send HEAD request to %s' % url,
2210             fatal=False)
2211
2212         if head_response is not False:
2213             # Check for redirect
2214             new_url = compat_str(head_response.geturl())
2215             if url != new_url:
2216                 self.report_following_redirect(new_url)
2217                 if force_videoid:
2218                     new_url = smuggle_url(
2219                         new_url, {'force_videoid': force_videoid})
2220                 return self.url_result(new_url)
2221
2222         full_response = None
2223         if head_response is False:
2224             request = sanitized_Request(url)
2225             request.add_header('Accept-Encoding', '*')
2226             full_response = self._request_webpage(request, video_id)
2227             head_response = full_response
2228
2229         info_dict = {
2230             'id': video_id,
2231             'title': self._generic_title(url),
2232             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
2233         }
2234
2235         # Check for direct link to a video
2236         content_type = head_response.headers.get('Content-Type', '').lower()
2237         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
2238         if m:
2239             format_id = compat_str(m.group('format_id'))
2240             if format_id.endswith('mpegurl'):
2241                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
2242             elif format_id == 'f4m':
2243                 formats = self._extract_f4m_formats(url, video_id)
2244             else:
2245                 formats = [{
2246                     'format_id': format_id,
2247                     'url': url,
2248                     'vcodec': 'none' if m.group('type') == 'audio' else None
2249                 }]
2250                 info_dict['direct'] = True
2251             self._sort_formats(formats)
2252             info_dict['formats'] = formats
2253             return info_dict
2254
2255         if not self._downloader.params.get('test', False) and not is_intentional:
2256             force = self._downloader.params.get('force_generic_extractor', False)
2257             self._downloader.report_warning(
2258                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
2259
2260         if not full_response:
2261             request = sanitized_Request(url)
2262             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
2263             # making it impossible to download only chunk of the file (yet we need only 512kB to
2264             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
2265             # that will always result in downloading the whole file that is not desirable.
2266             # Therefore for extraction pass we have to override Accept-Encoding to any in order
2267             # to accept raw bytes and being able to download only a chunk.
2268             # It may probably better to solve this by checking Content-Type for application/octet-stream
2269             # after HEAD request finishes, but not sure if we can rely on this.
2270             request.add_header('Accept-Encoding', '*')
2271             full_response = self._request_webpage(request, video_id)
2272
2273         first_bytes = full_response.read(512)
2274
2275         # Is it an M3U playlist?
2276         if first_bytes.startswith(b'#EXTM3U'):
2277             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
2278             self._sort_formats(info_dict['formats'])
2279             return info_dict
2280
2281         # Maybe it's a direct link to a video?
2282         # Be careful not to download the whole thing!
2283         if not is_html(first_bytes):
2284             self._downloader.report_warning(
2285                 'URL could be a direct video link, returning it as such.')
2286             info_dict.update({
2287                 'direct': True,
2288                 'url': url,
2289             })
2290             return info_dict
2291
2292         webpage = self._webpage_read_content(
2293             full_response, url, video_id, prefix=first_bytes)
2294
2295         self.report_extraction(video_id)
2296
2297         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
2298         try:
2299             doc = compat_etree_fromstring(webpage.encode('utf-8'))
2300             if doc.tag == 'rss':
2301                 return self._extract_rss(url, video_id, doc)
2302             elif doc.tag == 'SmoothStreamingMedia':
2303                 info_dict['formats'] = self._parse_ism_formats(doc, url)
2304                 self._sort_formats(info_dict['formats'])
2305                 return info_dict
2306             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
2307                 smil = self._parse_smil(doc, url, video_id)
2308                 self._sort_formats(smil['formats'])
2309                 return smil
2310             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
2311                 return self.playlist_result(
2312                     self._parse_xspf(
2313                         doc, video_id, xspf_url=url,
2314                         xspf_base_url=compat_str(full_response.geturl())),
2315                     video_id)
2316             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
2317                 info_dict['formats'] = self._parse_mpd_formats(
2318                     doc,
2319                     mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
2320                     mpd_url=url)
2321                 self._sort_formats(info_dict['formats'])
2322                 return info_dict
2323             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
2324                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
2325                 self._sort_formats(info_dict['formats'])
2326                 return info_dict
2327         except compat_xml_parse_error:
2328             pass
2329
2330         # Is it a Camtasia project?
2331         camtasia_res = self._extract_camtasia(url, video_id, webpage)
2332         if camtasia_res is not None:
2333             return camtasia_res
2334
2335         # Sometimes embedded video player is hidden behind percent encoding
2336         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
2337         # Unescaping the whole page allows to handle those cases in a generic way
2338         webpage = compat_urllib_parse_unquote(webpage)
2339
2340         # it's tempting to parse this further, but you would
2341         # have to take into account all the variations like
2342         #   Video Title - Site Name
2343         #   Site Name | Video Title
2344         #   Video Title - Tagline | Site Name
2345         # and so on and so forth; it's just not practical
2346         video_title = self._og_search_title(
2347             webpage, default=None) or self._html_search_regex(
2348             r'(?s)<title>(.*?)</title>', webpage, 'video title',
2349             default='video')
2350
2351         # Try to detect age limit automatically
2352         age_limit = self._rta_search(webpage)
2353         # And then there are the jokers who advertise that they use RTA,
2354         # but actually don't.
2355         AGE_LIMIT_MARKERS = [
2356             r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
2357         ]
2358         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
2359             age_limit = 18
2360
2361         # video uploader is domain name
2362         video_uploader = self._search_regex(
2363             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
2364
2365         video_description = self._og_search_description(webpage, default=None)
2366         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
2367
2368         info_dict.update({
2369             'title': video_title,
2370             'description': video_description,
2371             'thumbnail': video_thumbnail,
2372             'age_limit': age_limit,
2373         })
2374
2375         # Look for Brightcove Legacy Studio embeds
2376         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
2377         if bc_urls:
2378             entries = [{
2379                 '_type': 'url',
2380                 'url': smuggle_url(bc_url, {'Referer': url}),
2381                 'ie_key': 'BrightcoveLegacy'
2382             } for bc_url in bc_urls]
2383
2384             return {
2385                 '_type': 'playlist',
2386                 'title': video_title,
2387                 'id': video_id,
2388                 'entries': entries,
2389             }
2390
2391         # Look for Brightcove New Studio embeds
2392         bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
2393         if bc_urls:
2394             return self.playlist_from_matches(
2395                 bc_urls, video_id, video_title,
2396                 getter=lambda x: smuggle_url(x, {'referrer': url}),
2397                 ie='BrightcoveNew')
2398
2399         # Look for Nexx embeds
2400         nexx_urls = NexxIE._extract_urls(webpage)
2401         if nexx_urls:
2402             return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
2403
2404         # Look for Nexx iFrame embeds
2405         nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
2406         if nexx_embed_urls:
2407             return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
2408
2409         # Look for ThePlatform embeds
2410         tp_urls = ThePlatformIE._extract_urls(webpage)
2411         if tp_urls:
2412             return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
2413
2414         # Look for Vessel embeds
2415         vessel_urls = VesselIE._extract_urls(webpage)
2416         if vessel_urls:
2417             return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
2418
2419         # Look for embedded rtl.nl player
2420         matches = re.findall(
2421             r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
2422             webpage)
2423         if matches:
2424             return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
2425
2426         vimeo_urls = VimeoIE._extract_urls(url, webpage)
2427         if vimeo_urls:
2428             return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
2429
2430         vid_me_embed_url = self._search_regex(
2431             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
2432             webpage, 'vid.me embed', default=None)
2433         if vid_me_embed_url is not None:
2434             return self.url_result(vid_me_embed_url, 'Vidme')
2435
2436         # Look for YouTube embeds
2437         youtube_urls = YoutubeIE._extract_urls(webpage)
2438         if youtube_urls:
2439             return self.playlist_from_matches(
2440                 youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
2441
2442         matches = DailymotionIE._extract_urls(webpage)
2443         if matches:
2444             return self.playlist_from_matches(matches, video_id, video_title)
2445
2446         # Look for embedded Dailymotion playlist player (#3822)
2447         m = re.search(
2448             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
2449         if m:
2450             playlists = re.findall(
2451                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
2452             if playlists:
2453                 return self.playlist_from_matches(
2454                     playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
2455
2456         # Look for DailyMail embeds
2457         dailymail_urls = DailyMailIE._extract_urls(webpage)
2458         if dailymail_urls:
2459             return self.playlist_from_matches(
2460                 dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
2461
2462         # Look for embedded Wistia player
2463         wistia_url = WistiaIE._extract_url(webpage)
2464         if wistia_url:
2465             return {
2466                 '_type': 'url_transparent',
2467                 'url': self._proto_relative_url(wistia_url),
2468                 'ie_key': WistiaIE.ie_key(),
2469                 'uploader': video_uploader,
2470             }
2471
2472         # Look for SVT player
2473         svt_url = SVTIE._extract_url(webpage)
2474         if svt_url:
2475             return self.url_result(svt_url, 'SVT')
2476
2477         # Look for Bandcamp pages with custom domain
2478         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
2479         if mobj is not None:
2480             burl = unescapeHTML(mobj.group(1))
2481             # Don't set the extractor because it can be a track url or an album
2482             return self.url_result(burl)
2483
2484         # Look for embedded Vevo player
2485         mobj = re.search(
2486             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
2487         if mobj is not None:
2488             return self.url_result(mobj.group('url'))
2489
2490         # Look for embedded Viddler player
2491         mobj = re.search(
2492             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
2493             webpage)
2494         if mobj is not None:
2495             return self.url_result(mobj.group('url'))
2496
2497         # Look for NYTimes player
2498         mobj = re.search(
2499             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
2500             webpage)
2501         if mobj is not None:
2502             return self.url_result(mobj.group('url'))
2503
2504         # Look for Libsyn player
2505         mobj = re.search(
2506             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
2507         if mobj is not None:
2508             return self.url_result(mobj.group('url'))
2509
2510         # Look for Ooyala videos
2511         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
2512                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
2513                 re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
2514                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
2515                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
2516         if mobj is not None:
2517             embed_token = self._search_regex(
2518                 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
2519                 webpage, 'ooyala embed token', default=None)
2520             return OoyalaIE._build_url_result(smuggle_url(
2521                 mobj.group('ec'), {
2522                     'domain': url,
2523                     'embed_token': embed_token,
2524                 }))
2525
2526         # Look for multiple Ooyala embeds on SBN network websites
2527         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
2528         if mobj is not None:
2529             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
2530             if embeds:
2531                 return self.playlist_from_matches(
2532                     embeds, video_id, video_title,
2533                     getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
2534
2535         # Look for Aparat videos
2536         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
2537         if mobj is not None:
2538             return self.url_result(mobj.group(1), 'Aparat')
2539
2540         # Look for MPORA videos
2541         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
2542         if mobj is not None:
2543             return self.url_result(mobj.group(1), 'Mpora')
2544
2545         # Look for embedded NovaMov-based player
2546         mobj = re.search(
2547             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
2548                     (?P<url>http://(?:(?:embed|www)\.)?
2549                         (?:novamov\.com|
2550                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
2551                            videoweed\.(?:es|com)|
2552                            movshare\.(?:net|sx|ag)|
2553                            divxstage\.(?:eu|net|ch|co|at|ag))
2554                         /embed\.php.+?)\1''', webpage)
2555         if mobj is not None:
2556             return self.url_result(mobj.group('url'))
2557
2558         # Look for embedded Facebook player
2559         facebook_urls = FacebookIE._extract_urls(webpage)
2560         if facebook_urls:
2561             return self.playlist_from_matches(facebook_urls, video_id, video_title)
2562
2563         # Look for embedded VK player
2564         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
2565         if mobj is not None:
2566             return self.url_result(mobj.group('url'), 'VK')
2567
2568         # Look for embedded Odnoklassniki player
2569         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
2570         if mobj is not None:
2571             return self.url_result(mobj.group('url'), 'Odnoklassniki')
2572
2573         # Look for embedded ivi player
2574         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
2575         if mobj is not None:
2576             return self.url_result(mobj.group('url'), 'Ivi')
2577
2578         # Look for embedded Huffington Post player
2579         mobj = re.search(
2580             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
2581         if mobj is not None:
2582             return self.url_result(mobj.group('url'), 'HuffPost')
2583
2584         # Look for embed.ly
2585         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
2586         if mobj is not None:
2587             return self.url_result(mobj.group('url'))
2588         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
2589         if mobj is not None:
2590             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
2591
2592         # Look for funnyordie embed
2593         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
2594         if matches:
2595             return self.playlist_from_matches(
2596                 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
2597
2598         # Look for BBC iPlayer embed
2599         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
2600         if matches:
2601             return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
2602
2603         # Look for embedded RUTV player
2604         rutv_url = RUTVIE._extract_url(webpage)
2605         if rutv_url:
2606             return self.url_result(rutv_url, 'RUTV')
2607
2608         # Look for embedded TVC player
2609         tvc_url = TVCIE._extract_url(webpage)
2610         if tvc_url:
2611             return self.url_result(tvc_url, 'TVC')
2612
2613         # Look for embedded SportBox player
2614         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
2615         if sportbox_urls:
2616             return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
2617
2618         # Look for embedded XHamster player
2619         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
2620         if xhamster_urls:
2621             return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
2622
2623         # Look for embedded TNAFlixNetwork player
2624         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
2625         if tnaflix_urls:
2626             return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
2627
2628         # Look for embedded PornHub player
2629         pornhub_urls = PornHubIE._extract_urls(webpage)
2630         if pornhub_urls:
2631             return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
2632
2633         # Look for embedded DrTuber player
2634         drtuber_urls = DrTuberIE._extract_urls(webpage)
2635         if drtuber_urls:
2636             return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
2637
2638         # Look for embedded RedTube player
2639         redtube_urls = RedTubeIE._extract_urls(webpage)
2640         if redtube_urls:
2641             return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
2642
2643         # Look for embedded Tube8 player
2644         tube8_urls = Tube8IE._extract_urls(webpage)
2645         if tube8_urls:
2646             return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
2647
2648         # Look for embedded Tvigle player
2649         mobj = re.search(
2650             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
2651         if mobj is not None:
2652             return self.url_result(mobj.group('url'), 'Tvigle')
2653
2654         # Look for embedded TED player
2655         mobj = re.search(
2656             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
2657         if mobj is not None:
2658             return self.url_result(mobj.group('url'), 'TED')
2659
2660         # Look for embedded Ustream videos
2661         ustream_url = UstreamIE._extract_url(webpage)
2662         if ustream_url:
2663             return self.url_result(ustream_url, UstreamIE.ie_key())
2664
2665         # Look for embedded arte.tv player
2666         mobj = re.search(
2667             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
2668             webpage)
2669         if mobj is not None:
2670             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
2671
2672         # Look for embedded francetv player
2673         mobj = re.search(
2674             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
2675             webpage)
2676         if mobj is not None:
2677             return self.url_result(mobj.group('url'))
2678
2679         # Look for embedded smotri.com player
2680         smotri_url = SmotriIE._extract_url(webpage)
2681         if smotri_url:
2682             return self.url_result(smotri_url, 'Smotri')
2683
2684         # Look for embedded Myvi.ru player
2685         myvi_url = MyviIE._extract_url(webpage)
2686         if myvi_url:
2687             return self.url_result(myvi_url)
2688
2689         # Look for embedded soundcloud player
2690         soundcloud_urls = SoundcloudIE._extract_urls(webpage)
2691         if soundcloud_urls:
2692             return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
2693
2694         # Look for tunein player
2695         tunein_urls = TuneInBaseIE._extract_urls(webpage)
2696         if tunein_urls:
2697             return self.playlist_from_matches(tunein_urls, video_id, video_title)
2698
2699         # Look for embedded mtvservices player
2700         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
2701         if mtvservices_url:
2702             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
2703
2704         # Look for embedded yahoo player
2705         mobj = re.search(
2706             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
2707             webpage)
2708         if mobj is not None:
2709             return self.url_result(mobj.group('url'), 'Yahoo')
2710
2711         # Look for embedded sbs.com.au player
2712         mobj = re.search(
2713             r'''(?x)
2714             (?:
2715                 <meta\s+property="og:video"\s+content=|
2716                 <iframe[^>]+?src=
2717             )
2718             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2719             webpage)
2720         if mobj is not None:
2721             return self.url_result(mobj.group('url'), 'SBS')
2722
2723         # Look for embedded Cinchcast player
2724         mobj = re.search(
2725             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
2726             webpage)
2727         if mobj is not None:
2728             return self.url_result(mobj.group('url'), 'Cinchcast')
2729
2730         mobj = re.search(
2731             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
2732             webpage)
2733         if not mobj:
2734             mobj = re.search(
2735                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
2736                 webpage)
2737         if mobj is not None:
2738             return self.url_result(mobj.group('url'), 'MLB')
2739
2740         mobj = re.search(
2741             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
2742             webpage)
2743         if mobj is not None:
2744             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
2745
2746         mobj = re.search(
2747             r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
2748             webpage)
2749         if mobj is not None:
2750             return self.url_result(mobj.group('url'), 'Livestream')
2751
2752         # Look for Zapiks embed
2753         mobj = re.search(
2754             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
2755         if mobj is not None:
2756             return self.url_result(mobj.group('url'), 'Zapiks')
2757
2758         # Look for Kaltura embeds
2759         kaltura_url = KalturaIE._extract_url(webpage)
2760         if kaltura_url:
2761             return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
2762
2763         # Look for EaglePlatform embeds
2764         eagleplatform_url = EaglePlatformIE._extract_url(webpage)
2765         if eagleplatform_url:
2766             return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
2767
2768         # Look for ClipYou (uses EaglePlatform) embeds
2769         mobj = re.search(
2770             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
2771         if mobj is not None:
2772             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
2773
2774         # Look for Pladform embeds
2775         pladform_url = PladformIE._extract_url(webpage)
2776         if pladform_url:
2777             return self.url_result(pladform_url)
2778
2779         # Look for Videomore embeds
2780         videomore_url = VideomoreIE._extract_url(webpage)
2781         if videomore_url:
2782             return self.url_result(videomore_url)
2783
2784         # Look for Webcaster embeds
2785         webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
2786         if webcaster_url:
2787             return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
2788
2789         # Look for Playwire embeds
2790         mobj = re.search(
2791             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
2792         if mobj is not None:
2793             return self.url_result(mobj.group('url'))
2794
2795         # Look for 5min embeds
2796         mobj = re.search(
2797             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
2798         if mobj is not None:
2799             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
2800
2801         # Look for Crooks and Liars embeds
2802         mobj = re.search(
2803             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
2804         if mobj is not None:
2805             return self.url_result(mobj.group('url'))
2806
2807         # Look for NBC Sports VPlayer embeds
2808         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
2809         if nbc_sports_url:
2810             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
2811
2812         # Look for NBC News embeds
2813         nbc_news_embed_url = re.search(
2814             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
2815         if nbc_news_embed_url:
2816             return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
2817
2818         # Look for Google Drive embeds
2819         google_drive_url = GoogleDriveIE._extract_url(webpage)
2820         if google_drive_url:
2821             return self.url_result(google_drive_url, 'GoogleDrive')
2822
2823         # Look for UDN embeds
2824         mobj = re.search(
2825             r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
2826         if mobj is not None:
2827             return self.url_result(
2828                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
2829
2830         # Look for Senate ISVP iframe
2831         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2832         if senate_isvp_url:
2833             return self.url_result(senate_isvp_url, 'SenateISVP')
2834
2835         # Look for OnionStudios embeds
2836         onionstudios_url = OnionStudiosIE._extract_url(webpage)
2837         if onionstudios_url:
2838             return self.url_result(onionstudios_url)
2839
2840         # Look for ViewLift embeds
2841         viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2842         if viewlift_url:
2843             return self.url_result(viewlift_url)
2844
2845         # Look for JWPlatform embeds
2846         jwplatform_urls = JWPlatformIE._extract_urls(webpage)
2847         if jwplatform_urls:
2848             return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
2849
2850         # Look for Digiteka embeds
2851         digiteka_url = DigitekaIE._extract_url(webpage)
2852         if digiteka_url:
2853             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2854
2855         # Look for Arkena embeds
2856         arkena_url = ArkenaIE._extract_url(webpage)
2857         if arkena_url:
2858             return self.url_result(arkena_url, ArkenaIE.ie_key())
2859
2860         # Look for Piksel embeds
2861         piksel_url = PikselIE._extract_url(webpage)
2862         if piksel_url:
2863             return self.url_result(piksel_url, PikselIE.ie_key())
2864
2865         # Look for Limelight embeds
2866         limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
2867         if limelight_urls:
2868             return self.playlist_result(
2869                 limelight_urls, video_id, video_title, video_description)
2870
2871         # Look for Anvato embeds
2872         anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
2873         if anvato_urls:
2874             return self.playlist_result(
2875                 anvato_urls, video_id, video_title, video_description)
2876
2877         # Look for AdobeTVVideo embeds
2878         mobj = re.search(
2879             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2880             webpage)
2881         if mobj is not None:
2882             return self.url_result(
2883                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2884                 'AdobeTVVideo')
2885
2886         # Look for Vine embeds
2887         mobj = re.search(
2888             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2889             webpage)
2890         if mobj is not None:
2891             return self.url_result(
2892                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2893
2894         # Look for VODPlatform embeds
2895         mobj = re.search(
2896             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
2897             webpage)
2898         if mobj is not None:
2899             return self.url_result(
2900                 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
2901
2902         # Look for Mangomolo embeds
2903         mobj = re.search(
2904             r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
2905                 (?:
2906                     video\?.*?\bid=(?P<video_id>\d+)|
2907                     index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
2908                 ).+?)\1''', webpage)
2909         if mobj is not None:
2910             info = {
2911                 '_type': 'url_transparent',
2912                 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
2913                 'title': video_title,
2914                 'description': video_description,
2915                 'thumbnail': video_thumbnail,
2916                 'uploader': video_uploader,
2917             }
2918             video_id = mobj.group('video_id')
2919             if video_id:
2920                 info.update({
2921                     'ie_key': 'MangomoloVideo',
2922                     'id': video_id,
2923                 })
2924             else:
2925                 info.update({
2926                     'ie_key': 'MangomoloLive',
2927                     'id': mobj.group('channel_id'),
2928                 })
2929             return info
2930
2931         # Look for Instagram embeds
2932         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2933         if instagram_embed_url is not None:
2934             return self.url_result(
2935                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2936
2937         # Look for LiveLeak embeds
2938         liveleak_urls = LiveLeakIE._extract_urls(webpage)
2939         if liveleak_urls:
2940             return self.playlist_from_matches(liveleak_urls, video_id, video_title)
2941
2942         # Look for 3Q SDN embeds
2943         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2944         if threeqsdn_url:
2945             return {
2946                 '_type': 'url_transparent',
2947                 'ie_key': ThreeQSDNIE.ie_key(),
2948                 'url': self._proto_relative_url(threeqsdn_url),
2949                 'title': video_title,
2950                 'description': video_description,
2951                 'thumbnail': video_thumbnail,
2952                 'uploader': video_uploader,
2953             }
2954
2955         # Look for VBOX7 embeds
2956         vbox7_url = Vbox7IE._extract_url(webpage)
2957         if vbox7_url:
2958             return self.url_result(vbox7_url, Vbox7IE.ie_key())
2959
2960         # Look for DBTV embeds
2961         dbtv_urls = DBTVIE._extract_urls(webpage)
2962         if dbtv_urls:
2963             return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
2964
2965         # Look for Videa embeds
2966         videa_urls = VideaIE._extract_urls(webpage)
2967         if videa_urls:
2968             return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
2969
2970         # Look for 20 minuten embeds
2971         twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
2972         if twentymin_urls:
2973             return self.playlist_from_matches(
2974                 twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
2975
2976         # Look for Openload embeds
2977         openload_urls = OpenloadIE._extract_urls(webpage)
2978         if openload_urls:
2979             return self.playlist_from_matches(
2980                 openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
2981
2982         # Look for VideoPress embeds
2983         videopress_urls = VideoPressIE._extract_urls(webpage)
2984         if videopress_urls:
2985             return self.playlist_from_matches(
2986                 videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
2987
2988         # Look for Rutube embeds
2989         rutube_urls = RutubeIE._extract_urls(webpage)
2990         if rutube_urls:
2991             return self.playlist_from_matches(
2992                 rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
2993
2994         # Look for WashingtonPost embeds
2995         wapo_urls = WashingtonPostIE._extract_urls(webpage)
2996         if wapo_urls:
2997             return self.playlist_from_matches(
2998                 wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
2999
3000         # Look for Mediaset embeds
3001         mediaset_urls = MediasetIE._extract_urls(webpage)
3002         if mediaset_urls:
3003             return self.playlist_from_matches(
3004                 mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
3005
3006         # Look for JOJ.sk embeds
3007         joj_urls = JojIE._extract_urls(webpage)
3008         if joj_urls:
3009             return self.playlist_from_matches(
3010                 joj_urls, video_id, video_title, ie=JojIE.ie_key())
3011
3012         # Look for megaphone.fm embeds
3013         mpfn_urls = MegaphoneIE._extract_urls(webpage)
3014         if mpfn_urls:
3015             return self.playlist_from_matches(
3016                 mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
3017
3018         # Look for vzaar embeds
3019         vzaar_urls = VzaarIE._extract_urls(webpage)
3020         if vzaar_urls:
3021             return self.playlist_from_matches(
3022                 vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
3023
3024         channel9_urls = Channel9IE._extract_urls(webpage)
3025         if channel9_urls:
3026             return self.playlist_from_matches(
3027                 channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
3028
3029         vshare_urls = VShareIE._extract_urls(webpage)
3030         if vshare_urls:
3031             return self.playlist_from_matches(
3032                 vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
3033
3034         # Look for Mediasite embeds
3035         mediasite_urls = MediasiteIE._extract_urls(webpage)
3036         if mediasite_urls:
3037             entries = [
3038                 self.url_result(smuggle_url(
3039                     compat_urlparse.urljoin(url, mediasite_url),
3040                     {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
3041                 for mediasite_url in mediasite_urls]
3042             return self.playlist_result(entries, video_id, video_title)
3043
3044         springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
3045         if springboardplatform_urls:
3046             return self.playlist_from_matches(
3047                 springboardplatform_urls, video_id, video_title,
3048                 ie=SpringboardPlatformIE.ie_key())
3049
3050         yapfiles_urls = YapFilesIE._extract_urls(webpage)
3051         if yapfiles_urls:
3052             return self.playlist_from_matches(
3053                 yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
3054
3055         vice_urls = ViceIE._extract_urls(webpage)
3056         if vice_urls:
3057             return self.playlist_from_matches(
3058                 vice_urls, video_id, video_title, ie=ViceIE.ie_key())
3059
3060         xfileshare_urls = XFileShareIE._extract_urls(webpage)
3061         if xfileshare_urls:
3062             return self.playlist_from_matches(
3063                 xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
3064
3065         cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
3066         if cloudflarestream_urls:
3067             return self.playlist_from_matches(
3068                 cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
3069
3070         peertube_urls = PeerTubeIE._extract_urls(webpage, url)
3071         if peertube_urls:
3072             return self.playlist_from_matches(
3073                 peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
3074
3075         indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
3076         if indavideo_urls:
3077             return self.playlist_from_matches(
3078                 indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
3079
3080         apa_urls = APAIE._extract_urls(webpage)
3081         if apa_urls:
3082             return self.playlist_from_matches(
3083                 apa_urls, video_id, video_title, ie=APAIE.ie_key())
3084
3085         foxnews_urls = FoxNewsIE._extract_urls(webpage)
3086         if foxnews_urls:
3087             return self.playlist_from_matches(
3088                 foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
3089
3090         sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
3091             r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
3092             webpage)]
3093         if sharevideos_urls:
3094             return self.playlist_from_matches(
3095                 sharevideos_urls, video_id, video_title)
3096
3097         # Look for HTML5 media
3098         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
3099         if entries:
3100             if len(entries) == 1:
3101                 entries[0].update({
3102                     'id': video_id,
3103                     'title': video_title,
3104                 })
3105             else:
3106                 for num, entry in enumerate(entries, start=1):
3107                     entry.update({
3108                         'id': '%s-%s' % (video_id, num),
3109                         'title': '%s (%d)' % (video_title, num),
3110                     })
3111             for entry in entries:
3112                 self._sort_formats(entry['formats'])
3113             return self.playlist_result(entries, video_id, video_title)
3114
3115         jwplayer_data = self._find_jwplayer_data(
3116             webpage, video_id, transform_source=js_to_json)
3117         if jwplayer_data:
3118             info = self._parse_jwplayer_data(
3119                 jwplayer_data, video_id, require_title=False, base_url=url)
3120             return merge_dicts(info, info_dict)
3121
3122         # Video.js embed
3123         mobj = re.search(
3124             r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
3125             webpage)
3126         if mobj is not None:
3127             sources = self._parse_json(
3128                 mobj.group(1), video_id, transform_source=js_to_json,
3129                 fatal=False) or []
3130             if not isinstance(sources, list):
3131                 sources = [sources]
3132             formats = []
3133             for source in sources:
3134                 src = url_or_none(source.get('src'))
3135                 if not src:
3136                     continue
3137                 src = compat_urlparse.urljoin(url, src)
3138                 src_type = source.get('type')
3139                 if isinstance(src_type, compat_str):
3140                     src_type = src_type.lower()
3141                 ext = determine_ext(src).lower()
3142                 if src_type == 'video/youtube':
3143                     return self.url_result(src, YoutubeIE.ie_key())
3144                 if src_type == 'application/dash+xml' or ext == 'mpd':
3145                     formats.extend(self._extract_mpd_formats(
3146                         src, video_id, mpd_id='dash', fatal=False))
3147                 elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
3148                     formats.extend(self._extract_m3u8_formats(
3149                         src, video_id, 'mp4', entry_protocol='m3u8_native',
3150                         m3u8_id='hls', fatal=False))
3151                 else:
3152                     formats.append({
3153                         'url': src,
3154                         'ext': (mimetype2ext(src_type) or
3155                                 ext if ext in KNOWN_EXTENSIONS else 'mp4'),
3156                     })
3157             if formats:
3158                 self._sort_formats(formats)
3159                 info_dict['formats'] = formats
3160                 return info_dict
3161
3162         # Looking for http://schema.org/VideoObject
3163         json_ld = self._search_json_ld(
3164             webpage, video_id, default={}, expected_type='VideoObject')
3165         if json_ld.get('url'):
3166             return merge_dicts(json_ld, info_dict)
3167
3168         def check_video(vurl):
3169             if YoutubeIE.suitable(vurl):
3170                 return True
3171             if RtmpIE.suitable(vurl):
3172                 return True
3173             vpath = compat_urlparse.urlparse(vurl).path
3174             vext = determine_ext(vpath)
3175             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
3176
3177         def filter_video(urls):
3178             return list(filter(check_video, urls))
3179
3180         # Start with something easy: JW Player in SWFObject
3181         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
3182         if not found:
3183             # Look for gorilla-vid style embedding
3184             found = filter_video(re.findall(r'''(?sx)
3185                 (?:
3186                     jw_plugins|
3187                     JWPlayerOptions|
3188                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
3189                 )
3190                 .*?
3191                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
3192         if not found:
3193             # Broaden the search a little bit
3194             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
3195         if not found:
3196             # Broaden the findall a little bit: JWPlayer JS loader
3197             found = filter_video(re.findall(
3198                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
3199         if not found:
3200             # Flow player
3201             found = filter_video(re.findall(r'''(?xs)
3202                 flowplayer\("[^"]+",\s*
3203                     \{[^}]+?\}\s*,
3204                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
3205                         ["']?url["']?\s*:\s*["']([^"']+)["']
3206             ''', webpage))
3207         if not found:
3208             # Cinerama player
3209             found = re.findall(
3210                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
3211         if not found:
3212             # Try to find twitter cards info
3213             # twitter:player:stream should be checked before twitter:player since
3214             # it is expected to contain a raw stream (see
3215             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
3216             found = filter_video(re.findall(
3217                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
3218         if not found:
3219             # We look for Open Graph info:
3220             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
3221             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
3222             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
3223             if m_video_type is not None:
3224                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
3225         if not found:
3226             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
3227             found = re.search(
3228                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
3229                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
3230                 webpage)
3231             if not found:
3232                 # Look also in Refresh HTTP header
3233                 refresh_header = head_response.headers.get('Refresh')
3234                 if refresh_header:
3235                     # In python 2 response HTTP headers are bytestrings
3236                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
3237                         refresh_header = refresh_header.decode('iso-8859-1')
3238                     found = re.search(REDIRECT_REGEX, refresh_header)
3239             if found:
3240                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
3241                 if new_url != url:
3242                     self.report_following_redirect(new_url)
3243                     return {
3244                         '_type': 'url',
3245                         'url': new_url,
3246                     }
3247                 else:
3248                     found = None
3249
3250         if not found:
3251             # twitter:player is a https URL to iframe player that may or may not
3252             # be supported by youtube-dl thus this is checked the very last (see
3253             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
3254             embed_url = self._html_search_meta('twitter:player', webpage, default=None)
3255             if embed_url and embed_url != url:
3256                 return self.url_result(embed_url)
3257
3258         if not found:
3259             raise UnsupportedError(url)
3260
3261         entries = []
3262         for video_url in orderedSet(found):
3263             video_url = unescapeHTML(video_url)
3264             video_url = video_url.replace('\\/', '/')
3265             video_url = compat_urlparse.urljoin(url, video_url)
3266             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
3267
3268             # Sometimes, jwplayer extraction will result in a YouTube URL
3269             if YoutubeIE.suitable(video_url):
3270                 entries.append(self.url_result(video_url, 'Youtube'))
3271                 continue
3272
3273             # here's a fun little line of code for you:
3274             video_id = os.path.splitext(video_id)[0]
3275
3276             entry_info_dict = {
3277                 'id': video_id,
3278                 'uploader': video_uploader,
3279                 'title': video_title,
3280                 'age_limit': age_limit,
3281             }
3282
3283             if RtmpIE.suitable(video_url):
3284                 entry_info_dict.update({
3285                     '_type': 'url_transparent',
3286                     'ie_key': RtmpIE.ie_key(),
3287                     'url': video_url,
3288                 })
3289                 entries.append(entry_info_dict)
3290                 continue
3291
3292             ext = determine_ext(video_url)
3293             if ext == 'smil':
3294                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
3295             elif ext == 'xspf':
3296                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
3297             elif ext == 'm3u8':
3298                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
3299             elif ext == 'mpd':
3300                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
3301             elif ext == 'f4m':
3302                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
3303             elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
3304                 # Just matching .ism/manifest is not enough to be reliably sure
3305                 # whether it's actually an ISM manifest or some other streaming
3306                 # manifest since there are various streaming URL formats
3307                 # possible (see [1]) as well as some other shenanigans like
3308                 # .smil/manifest URLs that actually serve an ISM (see [2]) and
3309                 # so on.
3310                 # Thus the most reasonable way to solve this is to delegate
3311                 # to generic extractor in order to look into the contents of
3312                 # the manifest itself.
3313                 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
3314                 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
3315                 entry_info_dict = self.url_result(
3316                     smuggle_url(video_url, {'to_generic': True}),
3317                     GenericIE.ie_key())
3318             else:
3319                 entry_info_dict['url'] = video_url
3320
3321             if entry_info_dict.get('formats'):
3322                 self._sort_formats(entry_info_dict['formats'])
3323
3324             entries.append(entry_info_dict)
3325
3326         if len(entries) == 1:
3327             return entries[0]
3328         else:
3329             for num, e in enumerate(entries, start=1):
3330                 # 'url' results don't have a title
3331                 if e.get('title') is not None:
3332                     e['title'] = '%s (%d)' % (e['title'], num)
3333             return {
3334                 '_type': 'playlist',
3335                 'entries': entries,
3336             }