[condenast] improve embed support
[youtube-dl] / youtube_dl / extractor / generic.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import os
6 import re
7 import sys
8
9 from .common import InfoExtractor
10 from .youtube import YoutubeIE
11 from ..compat import (
12     compat_etree_fromstring,
13     compat_urllib_parse_unquote,
14     compat_urlparse,
15     compat_xml_parse_error,
16 )
17 from ..utils import (
18     determine_ext,
19     ExtractorError,
20     float_or_none,
21     HEADRequest,
22     is_html,
23     js_to_json,
24     orderedSet,
25     sanitized_Request,
26     smuggle_url,
27     unescapeHTML,
28     unified_strdate,
29     unsmuggle_url,
30     UnsupportedError,
31     xpath_text,
32 )
33 from .commonprotocols import RtmpIE
34 from .brightcove import (
35     BrightcoveLegacyIE,
36     BrightcoveNewIE,
37 )
38 from .nbc import NBCSportsVPlayerIE
39 from .ooyala import OoyalaIE
40 from .rutv import RUTVIE
41 from .tvc import TVCIE
42 from .sportbox import SportBoxEmbedIE
43 from .smotri import SmotriIE
44 from .myvi import MyviIE
45 from .condenast import CondeNastIE
46 from .udn import UDNEmbedIE
47 from .senateisvp import SenateISVPIE
48 from .svt import SVTIE
49 from .pornhub import PornHubIE
50 from .xhamster import XHamsterEmbedIE
51 from .tnaflix import TNAFlixNetworkEmbedIE
52 from .drtuber import DrTuberIE
53 from .redtube import RedTubeIE
54 from .vimeo import VimeoIE
55 from .dailymotion import (
56     DailymotionIE,
57     DailymotionCloudIE,
58 )
59 from .onionstudios import OnionStudiosIE
60 from .viewlift import ViewLiftEmbedIE
61 from .mtv import MTVServicesEmbeddedIE
62 from .pladform import PladformIE
63 from .videomore import VideomoreIE
64 from .webcaster import WebcasterFeedIE
65 from .googledrive import GoogleDriveIE
66 from .jwplatform import JWPlatformIE
67 from .digiteka import DigitekaIE
68 from .arkena import ArkenaIE
69 from .instagram import InstagramIE
70 from .liveleak import LiveLeakIE
71 from .threeqsdn import ThreeQSDNIE
72 from .theplatform import ThePlatformIE
73 from .vessel import VesselIE
74 from .kaltura import KalturaIE
75 from .eagleplatform import EaglePlatformIE
76 from .facebook import FacebookIE
77 from .soundcloud import SoundcloudIE
78 from .tunein import TuneInBaseIE
79 from .vbox7 import Vbox7IE
80 from .dbtv import DBTVIE
81 from .piksel import PikselIE
82 from .videa import VideaIE
83 from .twentymin import TwentyMinutenIE
84 from .ustream import UstreamIE
85 from .openload import OpenloadIE
86 from .videopress import VideoPressIE
87 from .rutube import RutubeIE
88 from .limelight import LimelightBaseIE
89 from .anvato import AnvatoIE
90 from .washingtonpost import WashingtonPostIE
91
92
93 class GenericIE(InfoExtractor):
94     IE_DESC = 'Generic downloader that works on some sites'
95     _VALID_URL = r'.*'
96     IE_NAME = 'generic'
97     _TESTS = [
98         # Direct link to a video
99         {
100             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
101             'md5': '67d406c2bcb6af27fa886f31aa934bbe',
102             'info_dict': {
103                 'id': 'trailer',
104                 'ext': 'mp4',
105                 'title': 'trailer',
106                 'upload_date': '20100513',
107             }
108         },
109         # Direct link to media delivered compressed (until Accept-Encoding is *)
110         {
111             'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
112             'md5': '128c42e68b13950268b648275386fc74',
113             'info_dict': {
114                 'id': 'FictionJunction-Parallel_Hearts',
115                 'ext': 'flac',
116                 'title': 'FictionJunction-Parallel_Hearts',
117                 'upload_date': '20140522',
118             },
119             'expected_warnings': [
120                 'URL could be a direct video link, returning it as such.'
121             ],
122             'skip': 'URL invalid',
123         },
124         # Direct download with broken HEAD
125         {
126             'url': 'http://ai-radio.org:8000/radio.opus',
127             'info_dict': {
128                 'id': 'radio',
129                 'ext': 'opus',
130                 'title': 'radio',
131             },
132             'params': {
133                 'skip_download': True,  # infinite live stream
134             },
135             'expected_warnings': [
136                 r'501.*Not Implemented',
137                 r'400.*Bad Request',
138             ],
139         },
140         # Direct link with incorrect MIME type
141         {
142             'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
143             'md5': '4ccbebe5f36706d85221f204d7eb5913',
144             'info_dict': {
145                 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
146                 'id': '5_Lennart_Poettering_-_Systemd',
147                 'ext': 'webm',
148                 'title': '5_Lennart_Poettering_-_Systemd',
149                 'upload_date': '20141120',
150             },
151             'expected_warnings': [
152                 'URL could be a direct video link, returning it as such.'
153             ]
154         },
155         # RSS feed
156         {
157             'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
158             'info_dict': {
159                 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
160                 'title': 'Zero Punctuation',
161                 'description': 're:.*groundbreaking video review series.*'
162             },
163             'playlist_mincount': 11,
164         },
165         # RSS feed with enclosure
166         {
167             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
168             'info_dict': {
169                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
170                 'ext': 'm4v',
171                 'upload_date': '20150228',
172                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
173             }
174         },
175         # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
176         {
177             'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
178             'info_dict': {
179                 'id': 'smil',
180                 'ext': 'mp4',
181                 'title': 'Automatics, robotics and biocybernetics',
182                 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
183                 'upload_date': '20130627',
184                 'formats': 'mincount:16',
185                 'subtitles': 'mincount:1',
186             },
187             'params': {
188                 'force_generic_extractor': True,
189                 'skip_download': True,
190             },
191         },
192         # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
193         {
194             'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
195             'info_dict': {
196                 'id': 'hds',
197                 'ext': 'flv',
198                 'title': 'hds',
199                 'formats': 'mincount:1',
200             },
201             'params': {
202                 'skip_download': True,
203             },
204         },
205         # SMIL from https://www.restudy.dk/video/play/id/1637
206         {
207             'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
208             'info_dict': {
209                 'id': 'video_1637',
210                 'ext': 'flv',
211                 'title': 'video_1637',
212                 'formats': 'mincount:3',
213             },
214             'params': {
215                 'skip_download': True,
216             },
217         },
218         # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
219         {
220             'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
221             'info_dict': {
222                 'id': 'smil-service',
223                 'ext': 'flv',
224                 'title': 'smil-service',
225                 'formats': 'mincount:1',
226             },
227             'params': {
228                 'skip_download': True,
229             },
230         },
231         # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
232         {
233             'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
234             'info_dict': {
235                 'id': '4719370',
236                 'ext': 'mp4',
237                 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
238                 'formats': 'mincount:3',
239             },
240             'params': {
241                 'skip_download': True,
242             },
243         },
244         # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
245         {
246             'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
247             'info_dict': {
248                 'id': 'mZlp2ctYIUEB',
249                 'ext': 'mp4',
250                 'title': 'Tikibad ontruimd wegens brand',
251                 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
252                 'thumbnail': r're:^https?://.*\.jpg$',
253                 'duration': 33,
254             },
255             'params': {
256                 'skip_download': True,
257             },
258         },
259         # MPD from http://dash-mse-test.appspot.com/media.html
260         {
261             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
262             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
263             'info_dict': {
264                 'id': 'car-20120827-manifest',
265                 'ext': 'mp4',
266                 'title': 'car-20120827-manifest',
267                 'formats': 'mincount:9',
268                 'upload_date': '20130904',
269             },
270             'params': {
271                 'format': 'bestvideo',
272             },
273         },
274         # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
275         {
276             'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
277             'info_dict': {
278                 'id': 'content',
279                 'ext': 'mp4',
280                 'title': 'content',
281                 'formats': 'mincount:8',
282             },
283             'params': {
284                 # m3u8 downloads
285                 'skip_download': True,
286             },
287             'skip': 'video gone',
288         },
289         # m3u8 served with Content-Type: text/plain
290         {
291             'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
292             'info_dict': {
293                 'id': 'index',
294                 'ext': 'mp4',
295                 'title': 'index',
296                 'upload_date': '20140720',
297                 'formats': 'mincount:11',
298             },
299             'params': {
300                 # m3u8 downloads
301                 'skip_download': True,
302             },
303             'skip': 'video gone',
304         },
305         # google redirect
306         {
307             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
308             'info_dict': {
309                 'id': 'cmQHVoWB5FY',
310                 'ext': 'mp4',
311                 'upload_date': '20130224',
312                 'uploader_id': 'TheVerge',
313                 'description': r're:^Chris Ziegler takes a look at the\.*',
314                 'uploader': 'The Verge',
315                 'title': 'First Firefox OS phones side-by-side',
316             },
317             'params': {
318                 'skip_download': False,
319             }
320         },
321         {
322             # redirect in Refresh HTTP header
323             'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
324             'info_dict': {
325                 'id': 'pO8h3EaFRdo',
326                 'ext': 'mp4',
327                 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
328                 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
329                 'upload_date': '20150917',
330                 'uploader_id': 'brtvofficial',
331                 'uploader': 'Boiler Room',
332             },
333             'params': {
334                 'skip_download': False,
335             },
336         },
337         {
338             'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
339             'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
340             'info_dict': {
341                 'id': '13601338388002',
342                 'ext': 'mp4',
343                 'uploader': 'www.hodiho.fr',
344                 'title': 'R\u00e9gis plante sa Jeep',
345             }
346         },
347         # bandcamp page with custom domain
348         {
349             'add_ie': ['Bandcamp'],
350             'url': 'http://bronyrock.com/track/the-pony-mash',
351             'info_dict': {
352                 'id': '3235767654',
353                 'ext': 'mp3',
354                 'title': 'The Pony Mash',
355                 'uploader': 'M_Pallante',
356             },
357             'skip': 'There is a limit of 200 free downloads / month for the test song',
358         },
359         {
360             # embedded brightcove video
361             # it also tests brightcove videos that need to set the 'Referer'
362             # in the http requests
363             'add_ie': ['BrightcoveLegacy'],
364             'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
365             'info_dict': {
366                 'id': '2765128793001',
367                 'ext': 'mp4',
368                 'title': 'Le cours de bourse : l’analyse technique',
369                 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
370                 'uploader': 'BFM BUSINESS',
371             },
372             'params': {
373                 'skip_download': True,
374             },
375         },
376         {
377             # embedded with itemprop embedURL and video id spelled as `idVideo`
378             'add_id': ['BrightcoveLegacy'],
379             'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
380             'info_dict': {
381                 'id': '5255628253001',
382                 'ext': 'mp4',
383                 'title': 'md5:37c519b1128915607601e75a87995fc0',
384                 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
385                 'uploader': 'BFM BUSINESS',
386                 'uploader_id': '876450612001',
387                 'timestamp': 1482255315,
388                 'upload_date': '20161220',
389             },
390             'params': {
391                 'skip_download': True,
392             },
393         },
394         {
395             # https://github.com/rg3/youtube-dl/issues/2253
396             'url': 'http://bcove.me/i6nfkrc3',
397             'md5': '0ba9446db037002366bab3b3eb30c88c',
398             'info_dict': {
399                 'id': '3101154703001',
400                 'ext': 'mp4',
401                 'title': 'Still no power',
402                 'uploader': 'thestar.com',
403                 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
404             },
405             'add_ie': ['BrightcoveLegacy'],
406             'skip': 'video gone',
407         },
408         {
409             'url': 'http://www.championat.com/video/football/v/87/87499.html',
410             'md5': 'fb973ecf6e4a78a67453647444222983',
411             'info_dict': {
412                 'id': '3414141473001',
413                 'ext': 'mp4',
414                 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
415                 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
416                 'uploader': 'Championat',
417             },
418         },
419         {
420             # https://github.com/rg3/youtube-dl/issues/3541
421             'add_ie': ['BrightcoveLegacy'],
422             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
423             'info_dict': {
424                 'id': '3866516442001',
425                 'ext': 'mp4',
426                 'title': 'Leer mij vrouwen kennen: Aflevering 1',
427                 'description': 'Leer mij vrouwen kennen: Aflevering 1',
428                 'uploader': 'SBS Broadcasting',
429             },
430             'skip': 'Restricted to Netherlands',
431             'params': {
432                 'skip_download': True,  # m3u8 download
433             },
434         },
435         {
436             # Brightcove video in <iframe>
437             'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
438             'md5': '36d74ef5e37c8b4a2ce92880d208b968',
439             'info_dict': {
440                 'id': '5360463607001',
441                 'ext': 'mp4',
442                 'title': '叙利亚失明儿童在废墟上演唱《心跳》  呼吁获得正常童年生活',
443                 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
444                 'uploader': 'United Nations',
445                 'uploader_id': '1362235914001',
446                 'timestamp': 1489593889,
447                 'upload_date': '20170315',
448             },
449             'add_ie': ['BrightcoveLegacy'],
450         },
451         {
452             # Brightcove with alternative playerID key
453             'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
454             'info_dict': {
455                 'id': 'nmeth.2062_SV1',
456                 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
457             },
458             'playlist': [{
459                 'info_dict': {
460                     'id': '2228375078001',
461                     'ext': 'mp4',
462                     'title': 'nmeth.2062-sv1',
463                     'description': 'nmeth.2062-sv1',
464                     'timestamp': 1363357591,
465                     'upload_date': '20130315',
466                     'uploader': 'Nature Publishing Group',
467                     'uploader_id': '1964492299001',
468                 },
469             }],
470         },
471         {
472             # Brightcove with UUID in videoPlayer
473             'url': 'http://www8.hp.com/cn/zh/home.html',
474             'info_dict': {
475                 'id': '5255815316001',
476                 'ext': 'mp4',
477                 'title': 'Sprocket Video - China',
478                 'description': 'Sprocket Video - China',
479                 'uploader': 'HP-Video Gallery',
480                 'timestamp': 1482263210,
481                 'upload_date': '20161220',
482                 'uploader_id': '1107601872001',
483             },
484             'params': {
485                 'skip_download': True,  # m3u8 download
486             },
487             'skip': 'video rotates...weekly?',
488         },
489         {
490             # Brightcove:new type [2].
491             'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
492             'md5': '2b35148fcf48da41c9fb4591650784f3',
493             'info_dict': {
494                 'id': '5348741021001',
495                 'ext': 'mp4',
496                 'upload_date': '20170306',
497                 'uploader_id': '4191638492001',
498                 'timestamp': 1488769918,
499                 'title': 'VIDEO:  St. Thomas More earns first trip to basketball semis',
500
501             },
502         },
503         {
504             # Alternative brightcove <video> attributes
505             'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
506             'info_dict': {
507                 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
508                 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
509             },
510             'playlist': [{
511                 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
512                 'info_dict': {
513                     'id': '5311302538001',
514                     'ext': 'mp4',
515                     'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
516                     'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
517                     'timestamp': 1486321708,
518                     'upload_date': '20170205',
519                     'uploader_id': '800000640001',
520                 },
521                 'only_matching': True,
522             }],
523         },
524         {
525             # Brightcove with UUID in videoPlayer
526             'url': 'http://www8.hp.com/cn/zh/home.html',
527             'info_dict': {
528                 'id': '5255815316001',
529                 'ext': 'mp4',
530                 'title': 'Sprocket Video - China',
531                 'description': 'Sprocket Video - China',
532                 'uploader': 'HP-Video Gallery',
533                 'timestamp': 1482263210,
534                 'upload_date': '20161220',
535                 'uploader_id': '1107601872001',
536             },
537             'params': {
538                 'skip_download': True,  # m3u8 download
539             },
540         },
541         # ooyala video
542         {
543             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
544             'md5': '166dd577b433b4d4ebfee10b0824d8ff',
545             'info_dict': {
546                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
547                 'ext': 'mp4',
548                 'title': '2cc213299525360.mov',  # that's what we get
549                 'duration': 238.231,
550             },
551             'add_ie': ['Ooyala'],
552         },
553         {
554             # ooyala video embedded with http://player.ooyala.com/iframe.js
555             'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
556             'info_dict': {
557                 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
558                 'ext': 'mp4',
559                 'title': '"Steve Jobs: Man in the Machine" trailer',
560                 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
561                 'duration': 135.427,
562             },
563             'params': {
564                 'skip_download': True,
565             },
566             'skip': 'movie expired',
567         },
568         # embed.ly video
569         {
570             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
571             'info_dict': {
572                 'id': '9ODmcdjQcHQ',
573                 'ext': 'mp4',
574                 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
575                 'upload_date': '20140225',
576                 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
577                 'uploader': 'Tested',
578                 'uploader_id': 'testedcom',
579             },
580             # No need to test YoutubeIE here
581             'params': {
582                 'skip_download': True,
583             },
584         },
585         # funnyordie embed
586         {
587             'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
588             'info_dict': {
589                 'id': '18e820ec3f',
590                 'ext': 'mp4',
591                 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
592                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
593             },
594             # HEAD requests lead to endless 301, while GET is OK
595             'expected_warnings': ['301'],
596         },
597         # RUTV embed
598         {
599             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
600             'info_dict': {
601                 'id': '776940',
602                 'ext': 'mp4',
603                 'title': 'Охотское море стало целиком российским',
604                 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
605             },
606             'params': {
607                 # m3u8 download
608                 'skip_download': True,
609             },
610         },
611         # TVC embed
612         {
613             'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
614             'info_dict': {
615                 'id': '55304',
616                 'ext': 'mp4',
617                 'title': 'Дошкольное воспитание',
618             },
619         },
620         # SportBox embed
621         {
622             'url': 'http://www.vestifinance.ru/articles/25753',
623             'info_dict': {
624                 'id': '25753',
625                 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
626             },
627             'playlist': [{
628                 'info_dict': {
629                     'id': '370908',
630                     'title': 'Госзаказ. День 3',
631                     'ext': 'mp4',
632                 }
633             }, {
634                 'info_dict': {
635                     'id': '370905',
636                     'title': 'Госзаказ. День 2',
637                     'ext': 'mp4',
638                 }
639             }, {
640                 'info_dict': {
641                     'id': '370902',
642                     'title': 'Госзаказ. День 1',
643                     'ext': 'mp4',
644                 }
645             }],
646             'params': {
647                 # m3u8 download
648                 'skip_download': True,
649             },
650         },
651         # Myvi.ru embed
652         {
653             'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
654             'info_dict': {
655                 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
656                 'ext': 'mp4',
657                 'title': 'Ужастики, русский трейлер (2015)',
658                 'thumbnail': r're:^https?://.*\.jpg$',
659                 'duration': 153,
660             }
661         },
662         # XHamster embed
663         {
664             'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
665             'info_dict': {
666                 'id': 'showthread',
667                 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
668             },
669             'playlist_mincount': 7,
670             # This forum does not allow <iframe> syntaxes anymore
671             # Now HTML tags are displayed as-is
672             'skip': 'No videos on this page',
673         },
674         # Embedded TED video
675         {
676             'url': 'http://en.support.wordpress.com/videos/ted-talks/',
677             'md5': '65fdff94098e4a607385a60c5177c638',
678             'info_dict': {
679                 'id': '1969',
680                 'ext': 'mp4',
681                 'title': 'Hidden miracles of the natural world',
682                 'uploader': 'Louie Schwartzberg',
683                 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
684             }
685         },
686         # nowvideo embed hidden behind percent encoding
687         {
688             'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
689             'md5': '2baf4ddd70f697d94b1c18cf796d5107',
690             'info_dict': {
691                 'id': '06e53103ca9aa',
692                 'ext': 'flv',
693                 'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
694                 'description': 'No description',
695             },
696         },
697         # arte embed
698         {
699             'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
700             'md5': '7653032cbb25bf6c80d80f217055fa43',
701             'info_dict': {
702                 'id': '048195-004_PLUS7-F',
703                 'ext': 'flv',
704                 'title': 'X:enius',
705                 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
706                 'upload_date': '20140320',
707             },
708             'params': {
709                 'skip_download': 'Requires rtmpdump'
710             },
711             'skip': 'video gone',
712         },
713         # francetv embed
714         {
715             'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
716             'info_dict': {
717                 'id': 'EV_30231',
718                 'ext': 'mp4',
719                 'title': 'Alcaline, le concert avec Calogero',
720                 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
721                 'upload_date': '20150226',
722                 'timestamp': 1424989860,
723                 'duration': 5400,
724             },
725             'params': {
726                 # m3u8 downloads
727                 'skip_download': True,
728             },
729             'expected_warnings': [
730                 'Forbidden'
731             ]
732         },
733         # Condé Nast embed
734         {
735             'url': 'http://www.wired.com/2014/04/honda-asimo/',
736             'md5': 'ba0dfe966fa007657bd1443ee672db0f',
737             'info_dict': {
738                 'id': '53501be369702d3275860000',
739                 'ext': 'mp4',
740                 'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
741             }
742         },
743         # Dailymotion embed
744         {
745             'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
746             'md5': '441aeeb82eb72c422c7f14ec533999cd',
747             'info_dict': {
748                 'id': 'k2mm4bCdJ6CQ2i7c8o2',
749                 'ext': 'mp4',
750                 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
751                 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
752                 'uploader': 'Spi0n',
753                 'uploader_id': 'xgditw',
754                 'upload_date': '20140425',
755                 'timestamp': 1398441542,
756             },
757             'add_ie': ['Dailymotion'],
758         },
759         # YouTube embed
760         {
761             'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
762             'info_dict': {
763                 'id': 'FXRb4ykk4S0',
764                 'ext': 'mp4',
765                 'title': 'The NBL Auction 2014',
766                 'uploader': 'BADMINTON England',
767                 'uploader_id': 'BADMINTONEvents',
768                 'upload_date': '20140603',
769                 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
770             },
771             'add_ie': ['Youtube'],
772             'params': {
773                 'skip_download': True,
774             }
775         },
776         # MTVSercices embed
777         {
778             'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
779             'md5': 'ca1aef97695ef2c1d6973256a57e5252',
780             'info_dict': {
781                 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
782                 'ext': 'mp4',
783                 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
784                 'description': 'Two valets share their love for movie star Liam Neesons.',
785                 'timestamp': 1349922600,
786                 'upload_date': '20121011',
787             },
788         },
789         # YouTube embed via <data-embed-url="">
790         {
791             'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
792             'info_dict': {
793                 'id': '4vAffPZIT44',
794                 'ext': 'mp4',
795                 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
796                 'uploader': 'Gameloft',
797                 'uploader_id': 'gameloft',
798                 'upload_date': '20140828',
799                 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
800             },
801             'params': {
802                 'skip_download': True,
803             }
804         },
805         # YouTube <object> embed
806         {
807             'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
808             'md5': '516718101ec834f74318df76259fb3cc',
809             'info_dict': {
810                 'id': 'msN87y-iEx0',
811                 'ext': 'webm',
812                 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
813                 'upload_date': '20080526',
814                 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
815                 'uploader': 'Christopher Sykes',
816                 'uploader_id': 'ChristopherJSykes',
817             },
818             'add_ie': ['Youtube'],
819         },
820         # Camtasia studio
821         {
822             'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
823             'playlist': [{
824                 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
825                 'info_dict': {
826                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
827                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
828                     'ext': 'flv',
829                     'duration': 2235.90,
830                 }
831             }, {
832                 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
833                 'info_dict': {
834                     'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
835                     'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
836                     'ext': 'flv',
837                     'duration': 2235.93,
838                 }
839             }],
840             'info_dict': {
841                 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
842             }
843         },
844         # Flowplayer
845         {
846             'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
847             'md5': '9d65602bf31c6e20014319c7d07fba27',
848             'info_dict': {
849                 'id': '5123ea6d5e5a7',
850                 'ext': 'mp4',
851                 'age_limit': 18,
852                 'uploader': 'www.handjobhub.com',
853                 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
854             }
855         },
856         # Multiple brightcove videos
857         # https://github.com/rg3/youtube-dl/issues/2283
858         {
859             'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
860             'info_dict': {
861                 'id': 'always-never',
862                 'title': 'Always / Never - The New Yorker',
863             },
864             'playlist_count': 3,
865             'params': {
866                 'extract_flat': False,
867                 'skip_download': True,
868             }
869         },
870         # MLB embed
871         {
872             'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
873             'md5': '96f09a37e44da40dd083e12d9a683327',
874             'info_dict': {
875                 'id': '33322633',
876                 'ext': 'mp4',
877                 'title': 'Ump changes call to ball',
878                 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
879                 'duration': 48,
880                 'timestamp': 1401537900,
881                 'upload_date': '20140531',
882                 'thumbnail': r're:^https?://.*\.jpg$',
883             },
884         },
885         # Wistia embed
886         {
887             'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
888             'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
889             'info_dict': {
890                 'id': '6e2wtrbdaf',
891                 'ext': 'mov',
892                 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
893                 'description': 'a Paywall Videos video from Remilon',
894                 'duration': 644.072,
895                 'uploader': 'study.com',
896                 'timestamp': 1459678540,
897                 'upload_date': '20160403',
898                 'filesize': 24687186,
899             },
900         },
901         {
902             'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
903             'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
904             'info_dict': {
905                 'id': 'uxjb0lwrcz',
906                 'ext': 'mp4',
907                 'title': 'Conversation about Hexagonal Rails Part 1',
908                 'description': 'a Martin Fowler video from ThoughtWorks',
909                 'duration': 1715.0,
910                 'uploader': 'thoughtworks.wistia.com',
911                 'timestamp': 1401832161,
912                 'upload_date': '20140603',
913             },
914         },
915         # Wistia standard embed (async)
916         {
917             'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
918             'info_dict': {
919                 'id': '807fafadvk',
920                 'ext': 'mp4',
921                 'title': 'Drip Brennan Dunn Workshop',
922                 'description': 'a JV Webinars video from getdrip-1',
923                 'duration': 4986.95,
924                 'timestamp': 1463607249,
925                 'upload_date': '20160518',
926             },
927             'params': {
928                 'skip_download': True,
929             }
930         },
931         # Soundcloud embed
932         {
933             'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
934             'info_dict': {
935                 'id': '174391317',
936                 'ext': 'mp3',
937                 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
938                 'uploader': 'Sophos Security',
939                 'title': 'Chet Chat 171 - Oct 29, 2014',
940                 'upload_date': '20141029',
941             }
942         },
943         # Soundcloud multiple embeds
944         {
945             'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
946             'info_dict': {
947                 'id': '52809',
948                 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance  | TAB + AUDIO',
949             },
950             'playlist_mincount': 7,
951         },
952         # TuneIn station embed
953         {
954             'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
955             'info_dict': {
956                 'id': '204146',
957                 'ext': 'mp3',
958                 'title': 'CNRV',
959                 'location': 'Paris, France',
960                 'is_live': True,
961             },
962             'params': {
963                 # Live stream
964                 'skip_download': True,
965             },
966         },
967         # Livestream embed
968         {
969             'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
970             'info_dict': {
971                 'id': '67864563',
972                 'ext': 'flv',
973                 'upload_date': '20141112',
974                 'title': 'Rosetta #CometLanding webcast HL 10',
975             }
976         },
977         # Another Livestream embed, without 'new.' in URL
978         {
979             'url': 'https://www.freespeech.org/',
980             'info_dict': {
981                 'id': '123537347',
982                 'ext': 'mp4',
983                 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
984             },
985             'params': {
986                 # Live stream
987                 'skip_download': True,
988             },
989         },
990         # LazyYT
991         {
992             'url': 'https://skiplagged.com/',
993             'info_dict': {
994                 'id': 'skiplagged',
995                 'title': 'Skiplagged: The smart way to find cheap flights',
996             },
997             'playlist_mincount': 1,
998             'add_ie': ['Youtube'],
999         },
1000         # Cinchcast embed
1001         {
1002             'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
1003             'info_dict': {
1004                 'id': '7141703',
1005                 'ext': 'mp3',
1006                 'upload_date': '20141126',
1007                 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
1008             }
1009         },
1010         # Cinerama player
1011         {
1012             'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
1013             'info_dict': {
1014                 'id': '730m_DandD_1901_512k',
1015                 'ext': 'mp4',
1016                 'uploader': 'www.abc.net.au',
1017                 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
1018             }
1019         },
1020         # embedded viddler video
1021         {
1022             'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
1023             'info_dict': {
1024                 'id': '4d03aad9',
1025                 'ext': 'mp4',
1026                 'uploader': 'deadspin',
1027                 'title': 'WALL-TO-GORTAT',
1028                 'timestamp': 1422285291,
1029                 'upload_date': '20150126',
1030             },
1031             'add_ie': ['Viddler'],
1032         },
1033         # Libsyn embed
1034         {
1035             'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
1036             'info_dict': {
1037                 'id': '3377616',
1038                 'ext': 'mp3',
1039                 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
1040                 'description': 'md5:601cb790edd05908957dae8aaa866465',
1041                 'upload_date': '20150220',
1042             },
1043             'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
1044         },
1045         # jwplayer YouTube
1046         {
1047             'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
1048             'info_dict': {
1049                 'id': 'Mrj4DVp2zeA',
1050                 'ext': 'mp4',
1051                 'upload_date': '20150212',
1052                 'uploader': 'The National Archives UK',
1053                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
1054                 'uploader_id': 'NationalArchives08',
1055                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
1056             },
1057         },
1058         # jwplayer rtmp
1059         {
1060             'url': 'http://www.suffolk.edu/sjc/',
1061             'info_dict': {
1062                 'id': 'sjclive',
1063                 'ext': 'flv',
1064                 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
1065                 'uploader': 'www.suffolk.edu',
1066             },
1067             'params': {
1068                 'skip_download': True,
1069             }
1070         },
1071         # Complex jwplayer
1072         {
1073             'url': 'http://www.indiedb.com/games/king-machine/videos',
1074             'info_dict': {
1075                 'id': 'videos',
1076                 'ext': 'mp4',
1077                 'title': 'king machine trailer 1',
1078                 'thumbnail': r're:^https?://.*\.jpg$',
1079             },
1080         },
1081         {
1082             # JWPlayer config passed as variable
1083             'url': 'http://www.txxx.com/videos/3326530/ariele/',
1084             'info_dict': {
1085                 'id': '3326530_hq',
1086                 'ext': 'mp4',
1087                 'title': 'ARIELE | Tube Cup',
1088                 'uploader': 'www.txxx.com',
1089                 'age_limit': 18,
1090             },
1091             'params': {
1092                 'skip_download': True,
1093             }
1094         },
1095         # rtl.nl embed
1096         {
1097             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
1098             'playlist_mincount': 5,
1099             'info_dict': {
1100                 'id': 'aanslagen-kopenhagen',
1101                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
1102             }
1103         },
1104         # Zapiks embed
1105         {
1106             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
1107             'info_dict': {
1108                 'id': '118046',
1109                 'ext': 'mp4',
1110                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
1111             }
1112         },
1113         # Kaltura embed (different embed code)
1114         {
1115             'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
1116             'info_dict': {
1117                 'id': '1_a52wc67y',
1118                 'ext': 'flv',
1119                 'upload_date': '20150127',
1120                 'uploader_id': 'PremierMedia',
1121                 'timestamp': int,
1122                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
1123             },
1124         },
1125         # Kaltura embed with single quotes
1126         {
1127             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
1128             'info_dict': {
1129                 'id': '0_izeg5utt',
1130                 'ext': 'mp4',
1131                 'title': '35871',
1132                 'timestamp': 1355743100,
1133                 'upload_date': '20121217',
1134                 'uploader_id': 'batchUser',
1135             },
1136             'add_ie': ['Kaltura'],
1137         },
1138         {
1139             # Kaltura embedded via quoted entry_id
1140             'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
1141             'info_dict': {
1142                 'id': '0_utuok90b',
1143                 'ext': 'mp4',
1144                 'title': '06_matthew_brender_raj_dutt',
1145                 'timestamp': 1466638791,
1146                 'upload_date': '20160622',
1147             },
1148             'add_ie': ['Kaltura'],
1149             'expected_warnings': [
1150                 'Could not send HEAD request'
1151             ],
1152             'params': {
1153                 'skip_download': True,
1154             }
1155         },
1156         {
1157             # Kaltura embedded, some fileExt broken (#11480)
1158             'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
1159             'info_dict': {
1160                 'id': '1_sgtvehim',
1161                 'ext': 'mp4',
1162                 'title': 'Our "Standard Models" of particle physics and cosmology',
1163                 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
1164                 'timestamp': 1321158993,
1165                 'upload_date': '20111113',
1166                 'uploader_id': 'kps1',
1167             },
1168             'add_ie': ['Kaltura'],
1169         },
1170         {
1171             # Kaltura iframe embed
1172             'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
1173             'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
1174             'info_dict': {
1175                 'id': '0_f2cfbpwy',
1176                 'ext': 'mp4',
1177                 'title': 'I. M. Pei: A Centennial Celebration',
1178                 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
1179                 'upload_date': '20170403',
1180                 'uploader_id': 'batchUser',
1181                 'timestamp': 1491232186,
1182             },
1183             'add_ie': ['Kaltura'],
1184         },
1185         # Eagle.Platform embed (generic URL)
1186         {
1187             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
1188             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1189             'info_dict': {
1190                 'id': '227304',
1191                 'ext': 'mp4',
1192                 'title': 'Навальный вышел на свободу',
1193                 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
1194                 'thumbnail': r're:^https?://.*\.jpg$',
1195                 'duration': 87,
1196                 'view_count': int,
1197                 'age_limit': 0,
1198             },
1199         },
1200         # ClipYou (Eagle.Platform) embed (custom URL)
1201         {
1202             'url': 'http://muz-tv.ru/play/7129/',
1203             # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
1204             'info_dict': {
1205                 'id': '12820',
1206                 'ext': 'mp4',
1207                 'title': "'O Sole Mio",
1208                 'thumbnail': r're:^https?://.*\.jpg$',
1209                 'duration': 216,
1210                 'view_count': int,
1211             },
1212         },
1213         # Pladform embed
1214         {
1215             'url': 'http://muz-tv.ru/kinozal/view/7400/',
1216             'info_dict': {
1217                 'id': '100183293',
1218                 'ext': 'mp4',
1219                 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
1220                 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
1221                 'thumbnail': r're:^https?://.*\.jpg$',
1222                 'duration': 694,
1223                 'age_limit': 0,
1224             },
1225         },
1226         # Playwire embed
1227         {
1228             'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
1229             'info_dict': {
1230                 'id': '3519514',
1231                 'ext': 'mp4',
1232                 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
1233                 'thumbnail': r're:^https?://.*\.png$',
1234                 'duration': 45.115,
1235             },
1236         },
1237         # 5min embed
1238         {
1239             'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
1240             'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
1241             'info_dict': {
1242                 'id': '518726732',
1243                 'ext': 'mp4',
1244                 'title': 'Facebook Creates "On This Day" | Crunch Report',
1245             },
1246         },
1247         # SVT embed
1248         {
1249             'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
1250             'info_dict': {
1251                 'id': '2900353',
1252                 'ext': 'flv',
1253                 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
1254                 'duration': 27,
1255                 'age_limit': 0,
1256             },
1257         },
1258         # Crooks and Liars embed
1259         {
1260             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
1261             'info_dict': {
1262                 'id': '8RUoRhRi',
1263                 'ext': 'mp4',
1264                 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
1265                 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
1266                 'timestamp': 1428207000,
1267                 'upload_date': '20150405',
1268                 'uploader': 'Heather',
1269             },
1270         },
1271         # Crooks and Liars external embed
1272         {
1273             'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
1274             'info_dict': {
1275                 'id': 'MTE3MjUtMzQ2MzA',
1276                 'ext': 'mp4',
1277                 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
1278                 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
1279                 'timestamp': 1265032391,
1280                 'upload_date': '20100201',
1281                 'uploader': 'Heather',
1282             },
1283         },
1284         # NBC Sports vplayer embed
1285         {
1286             'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
1287             'info_dict': {
1288                 'id': 'ln7x1qSThw4k',
1289                 'ext': 'flv',
1290                 'title': "PFT Live: New leader in the 'new-look' defense",
1291                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
1292                 'uploader': 'NBCU-SPORTS',
1293                 'upload_date': '20140107',
1294                 'timestamp': 1389118457,
1295             },
1296         },
1297         # NBC News embed
1298         {
1299             'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
1300             'md5': '1aa589c675898ae6d37a17913cf68d66',
1301             'info_dict': {
1302                 'id': '701714499682',
1303                 'ext': 'mp4',
1304                 'title': 'PREVIEW: On Assignment: David Letterman',
1305                 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
1306             },
1307         },
1308         # UDN embed
1309         {
1310             'url': 'https://video.udn.com/news/300346',
1311             'md5': 'fd2060e988c326991037b9aff9df21a6',
1312             'info_dict': {
1313                 'id': '300346',
1314                 'ext': 'mp4',
1315                 'title': '中一中男師變性 全校師生力挺',
1316                 'thumbnail': r're:^https?://.*\.jpg$',
1317             },
1318             'params': {
1319                 # m3u8 download
1320                 'skip_download': True,
1321             },
1322         },
1323         # Ooyala embed
1324         {
1325             'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
1326             'info_dict': {
1327                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
1328                 'ext': 'mp4',
1329                 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
1330                 'title': 'This is what separates the Excel masters from the wannabes',
1331                 'duration': 191.933,
1332             },
1333             'params': {
1334                 # m3u8 downloads
1335                 'skip_download': True,
1336             }
1337         },
1338         # Brightcove URL in single quotes
1339         {
1340             'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
1341             'md5': '4ae374f1f8b91c889c4b9203c8c752af',
1342             'info_dict': {
1343                 'id': '4255764656001',
1344                 'ext': 'mp4',
1345                 'title': 'SN Presents: Russell Martin, World Citizen',
1346                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
1347                 'uploader': 'Rogers Sportsnet',
1348                 'uploader_id': '1704050871',
1349                 'upload_date': '20150525',
1350                 'timestamp': 1432570283,
1351             },
1352         },
1353         # Dailymotion Cloud video
1354         {
1355             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
1356             'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
1357             'info_dict': {
1358                 'id': 'x2uy8t3',
1359                 'ext': 'mp4',
1360                 'title': 'Sauvons les abeilles ! - Le débat',
1361                 'description': 'md5:d9082128b1c5277987825d684939ca26',
1362                 'thumbnail': r're:^https?://.*\.jpe?g$',
1363                 'timestamp': 1434970506,
1364                 'upload_date': '20150622',
1365                 'uploader': 'Public Sénat',
1366                 'uploader_id': 'xa9gza',
1367             }
1368         },
1369         # OnionStudios embed
1370         {
1371             'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
1372             'info_dict': {
1373                 'id': '2855',
1374                 'ext': 'mp4',
1375                 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
1376                 'thumbnail': r're:^https?://.*\.jpe?g$',
1377                 'uploader': 'ClickHole',
1378                 'uploader_id': 'clickhole',
1379             }
1380         },
1381         # SnagFilms embed
1382         {
1383             'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
1384             'info_dict': {
1385                 'id': '74849a00-85a9-11e1-9660-123139220831',
1386                 'ext': 'mp4',
1387                 'title': '#whilewewatch',
1388             }
1389         },
1390         # AdobeTVVideo embed
1391         {
1392             'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
1393             'md5': '43662b577c018ad707a63766462b1e87',
1394             'info_dict': {
1395                 'id': '2456',
1396                 'ext': 'mp4',
1397                 'title': 'New experience with Acrobat DC',
1398                 'description': 'New experience with Acrobat DC',
1399                 'duration': 248.667,
1400             },
1401         },
1402         # BrightcoveInPageEmbed embed
1403         {
1404             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
1405             'info_dict': {
1406                 'id': '4238694884001',
1407                 'ext': 'flv',
1408                 'title': 'Tabletop: Dread, Last Thoughts',
1409                 'description': 'Tabletop: Dread, Last Thoughts',
1410                 'duration': 51690,
1411             },
1412         },
1413         # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
1414         # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
1415         {
1416             'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
1417             'info_dict': {
1418                 'id': '4785848093001',
1419                 'ext': 'mp4',
1420                 'title': 'The Cardinal Pell Interview',
1421                 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
1422                 'uploader': 'GlobeCast Australia - GlobeStream',
1423                 'uploader_id': '2733773828001',
1424                 'upload_date': '20160304',
1425                 'timestamp': 1457083087,
1426             },
1427             'params': {
1428                 # m3u8 downloads
1429                 'skip_download': True,
1430             },
1431         },
1432         {
1433             # Brightcove embed with whitespace around attribute names
1434             'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
1435             'info_dict': {
1436                 'id': '3167554373001',
1437                 'ext': 'mp4',
1438                 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
1439                 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
1440                 'uploader_id': '1079349493',
1441                 'upload_date': '20140207',
1442                 'timestamp': 1391810548,
1443             },
1444             'params': {
1445                 'skip_download': True,
1446             },
1447         },
1448         # Another form of arte.tv embed
1449         {
1450             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
1451             'md5': '850bfe45417ddf221288c88a0cffe2e2',
1452             'info_dict': {
1453                 'id': '030273-562_PLUS7-F',
1454                 'ext': 'mp4',
1455                 'title': 'ARTE Reportage - Nulle part, en France',
1456                 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
1457                 'upload_date': '20160409',
1458             },
1459         },
1460         # LiveLeak embed
1461         {
1462             'url': 'http://www.wykop.pl/link/3088787/',
1463             'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
1464             'info_dict': {
1465                 'id': '874_1459135191',
1466                 'ext': 'mp4',
1467                 'title': 'Man shows poor quality of new apartment building',
1468                 'description': 'The wall is like a sand pile.',
1469                 'uploader': 'Lake8737',
1470             }
1471         },
1472         # Duplicated embedded video URLs
1473         {
1474             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
1475             'info_dict': {
1476                 'id': '149298443_480_16c25b74_2',
1477                 'ext': 'mp4',
1478                 'title': 'vs. Blue Orange Spring Game',
1479                 'uploader': 'www.hudl.com',
1480             },
1481         },
1482         # twitter:player:stream embed
1483         {
1484             'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
1485             'info_dict': {
1486                 'id': 'master',
1487                 'ext': 'mp4',
1488                 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
1489                 'uploader': 'www.rtl.be',
1490             },
1491             'params': {
1492                 # m3u8 downloads
1493                 'skip_download': True,
1494             },
1495         },
1496         # twitter:player embed
1497         {
1498             'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
1499             'md5': 'a3e0df96369831de324f0778e126653c',
1500             'info_dict': {
1501                 'id': '4909620399001',
1502                 'ext': 'mp4',
1503                 'title': 'What Do Black Holes Sound Like?',
1504                 'description': 'what do black holes sound like',
1505                 'upload_date': '20160524',
1506                 'uploader_id': '29913724001',
1507                 'timestamp': 1464107587,
1508                 'uploader': 'TheAtlantic',
1509             },
1510             'add_ie': ['BrightcoveLegacy'],
1511         },
1512         # Facebook <iframe> embed
1513         {
1514             'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
1515             'md5': 'fbcde74f534176ecb015849146dd3aee',
1516             'info_dict': {
1517                 'id': '599637780109885',
1518                 'ext': 'mp4',
1519                 'title': 'Facebook video #599637780109885',
1520             },
1521         },
1522         # Facebook API embed
1523         {
1524             'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
1525             'md5': 'a47372ee61b39a7b90287094d447d94e',
1526             'info_dict': {
1527                 'id': '10153467542406923',
1528                 'ext': 'mp4',
1529                 'title': 'Facebook video #10153467542406923',
1530             },
1531         },
1532         # Wordpress "YouTube Video Importer" plugin
1533         {
1534             'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
1535             'md5': 'd16797741b560b485194eddda8121b48',
1536             'info_dict': {
1537                 'id': 'HNTXWDXV9Is',
1538                 'ext': 'mp4',
1539                 'title': 'Blue Devils Drumline Stanford lot 2016',
1540                 'upload_date': '20160627',
1541                 'uploader_id': 'GENOCIDE8GENERAL10',
1542                 'uploader': 'cylus cyrus',
1543             },
1544         },
1545         {
1546             # video stored on custom kaltura server
1547             'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
1548             'md5': '537617d06e64dfed891fa1593c4b30cc',
1549             'info_dict': {
1550                 'id': '0_1iotm5bh',
1551                 'ext': 'mp4',
1552                 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
1553                 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
1554                 'uploader_id': 'videos.expansion@el-mundo.net',
1555                 'upload_date': '20150429',
1556                 'timestamp': 1430303472,
1557             },
1558             'add_ie': ['Kaltura'],
1559         },
1560         {
1561             # Non-standard Vimeo embed
1562             'url': 'https://openclassrooms.com/courses/understanding-the-web',
1563             'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
1564             'info_dict': {
1565                 'id': '148867247',
1566                 'ext': 'mp4',
1567                 'title': 'Understanding the web - Teaser',
1568                 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
1569                 'upload_date': '20151214',
1570                 'uploader': 'OpenClassrooms',
1571                 'uploader_id': 'openclassrooms',
1572             },
1573             'add_ie': ['Vimeo'],
1574         },
1575         {
1576             # generic vimeo embed that requires original URL passed as Referer
1577             'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
1578             'only_matching': True,
1579         },
1580         {
1581             'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
1582             'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
1583             'info_dict': {
1584                 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
1585                 'ext': 'mp4',
1586                 'title': 'Big Buck Bunny',
1587                 'description': 'Royalty free test video',
1588                 'timestamp': 1432816365,
1589                 'upload_date': '20150528',
1590                 'is_live': False,
1591             },
1592             'params': {
1593                 'skip_download': True,
1594             },
1595             'add_ie': [ArkenaIE.ie_key()],
1596         },
1597         {
1598             'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
1599             'info_dict': {
1600                 'id': '1c7141f46c',
1601                 'ext': 'mp4',
1602                 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
1603             },
1604             'params': {
1605                 'skip_download': True,
1606             },
1607             'add_ie': [Vbox7IE.ie_key()],
1608         },
1609         {
1610             # DBTV embeds
1611             'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
1612             'info_dict': {
1613                 'id': '43254897',
1614                 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
1615             },
1616             'playlist_mincount': 3,
1617         },
1618         {
1619             # Videa embeds
1620             'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
1621             'info_dict': {
1622                 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
1623                 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
1624             },
1625             'playlist_mincount': 2,
1626         },
1627         {
1628             # 20 minuten embed
1629             'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
1630             'info_dict': {
1631                 'id': '523629',
1632                 'ext': 'mp4',
1633                 'title': 'So kommen Sie bei Eis und Schnee sicher an',
1634                 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
1635             },
1636             'params': {
1637                 'skip_download': True,
1638             },
1639             'add_ie': [TwentyMinutenIE.ie_key()],
1640         },
1641         {
1642             # VideoPress embed
1643             'url': 'https://en.support.wordpress.com/videopress/',
1644             'info_dict': {
1645                 'id': 'OcobLTqC',
1646                 'ext': 'm4v',
1647                 'title': 'IMG_5786',
1648                 'timestamp': 1435711927,
1649                 'upload_date': '20150701',
1650             },
1651             'params': {
1652                 'skip_download': True,
1653             },
1654             'add_ie': [VideoPressIE.ie_key()],
1655         },
1656         {
1657             # Rutube embed
1658             'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
1659             'info_dict': {
1660                 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
1661                 'ext': 'flv',
1662                 'title': 'Магаззино: Казань 2',
1663                 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
1664                 'uploader': 'Магаззино',
1665                 'upload_date': '20170228',
1666                 'uploader_id': '996642',
1667             },
1668             'params': {
1669                 'skip_download': True,
1670             },
1671             'add_ie': [RutubeIE.ie_key()],
1672         },
1673         {
1674             # ThePlatform embedded with whitespaces in URLs
1675             'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
1676             'only_matching': True,
1677         },
1678         {
1679             # Senate ISVP iframe https
1680             'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
1681             'md5': 'fb8c70b0b515e5037981a2492099aab8',
1682             'info_dict': {
1683                 'id': 'govtaff020316',
1684                 'ext': 'mp4',
1685                 'title': 'Integrated Senate Video Player',
1686             },
1687             'add_ie': [SenateISVPIE.ie_key()],
1688         },
1689         {
1690             # Limelight embeds (1 channel embed + 4 media embeds)
1691             'url': 'http://www.sedona.com/FacilitatorTraining2017',
1692             'info_dict': {
1693                 'id': 'FacilitatorTraining2017',
1694                 'title': 'Facilitator Training 2017',
1695             },
1696             'playlist_mincount': 5,
1697         },
1698         {
1699             'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
1700             'info_dict': {
1701                 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
1702                 'title': 'Standoff with Walnut Creek murder suspect ends',
1703                 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
1704             },
1705             'playlist_mincount': 4,
1706         },
1707         {
1708             # WashingtonPost embed
1709             'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
1710             'info_dict': {
1711                 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
1712                 'ext': 'mp4',
1713                 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
1714                 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
1715                 'timestamp': 1455216756,
1716                 'uploader': 'The Washington Post',
1717                 'upload_date': '20160211',
1718             },
1719             'add_ie': [WashingtonPostIE.ie_key()],
1720         },
1721         # {
1722         #     # TODO: find another test
1723         #     # http://schema.org/VideoObject
1724         #     'url': 'https://flipagram.com/f/nyvTSJMKId',
1725         #     'md5': '888dcf08b7ea671381f00fab74692755',
1726         #     'info_dict': {
1727         #         'id': 'nyvTSJMKId',
1728         #         'ext': 'mp4',
1729         #         'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
1730         #         'description': '#love for cats.',
1731         #         'timestamp': 1461244995,
1732         #         'upload_date': '20160421',
1733         #     },
1734         #     'params': {
1735         #         'force_generic_extractor': True,
1736         #     },
1737         # }
1738     ]
1739
1740     def report_following_redirect(self, new_url):
1741         """Report information extraction."""
1742         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
1743
1744     def _extract_rss(self, url, video_id, doc):
1745         playlist_title = doc.find('./channel/title').text
1746         playlist_desc_el = doc.find('./channel/description')
1747         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
1748
1749         entries = []
1750         for it in doc.findall('./channel/item'):
1751             next_url = xpath_text(it, 'link', fatal=False)
1752             if not next_url:
1753                 enclosure_nodes = it.findall('./enclosure')
1754                 for e in enclosure_nodes:
1755                     next_url = e.attrib.get('url')
1756                     if next_url:
1757                         break
1758
1759             if not next_url:
1760                 continue
1761
1762             entries.append({
1763                 '_type': 'url_transparent',
1764                 'url': next_url,
1765                 'title': it.find('title').text,
1766             })
1767
1768         return {
1769             '_type': 'playlist',
1770             'id': url,
1771             'title': playlist_title,
1772             'description': playlist_desc,
1773             'entries': entries,
1774         }
1775
1776     def _extract_camtasia(self, url, video_id, webpage):
1777         """ Returns None if no camtasia video can be found. """
1778
1779         camtasia_cfg = self._search_regex(
1780             r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
1781             webpage, 'camtasia configuration file', default=None)
1782         if camtasia_cfg is None:
1783             return None
1784
1785         title = self._html_search_meta('DC.title', webpage, fatal=True)
1786
1787         camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
1788         camtasia_cfg = self._download_xml(
1789             camtasia_url, video_id,
1790             note='Downloading camtasia configuration',
1791             errnote='Failed to download camtasia configuration')
1792         fileset_node = camtasia_cfg.find('./playlist/array/fileset')
1793
1794         entries = []
1795         for n in fileset_node.getchildren():
1796             url_n = n.find('./uri')
1797             if url_n is None:
1798                 continue
1799
1800             entries.append({
1801                 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
1802                 'title': '%s - %s' % (title, n.tag),
1803                 'url': compat_urlparse.urljoin(url, url_n.text),
1804                 'duration': float_or_none(n.find('./duration').text),
1805             })
1806
1807         return {
1808             '_type': 'playlist',
1809             'entries': entries,
1810             'title': title,
1811         }
1812
1813     def _real_extract(self, url):
1814         if url.startswith('//'):
1815             return {
1816                 '_type': 'url',
1817                 'url': self.http_scheme() + url,
1818             }
1819
1820         parsed_url = compat_urlparse.urlparse(url)
1821         if not parsed_url.scheme:
1822             default_search = self._downloader.params.get('default_search')
1823             if default_search is None:
1824                 default_search = 'fixup_error'
1825
1826             if default_search in ('auto', 'auto_warning', 'fixup_error'):
1827                 if '/' in url:
1828                     self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
1829                     return self.url_result('http://' + url)
1830                 elif default_search != 'fixup_error':
1831                     if default_search == 'auto_warning':
1832                         if re.match(r'^(?:url|URL)$', url):
1833                             raise ExtractorError(
1834                                 'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
1835                                 expected=True)
1836                         else:
1837                             self._downloader.report_warning(
1838                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
1839                     return self.url_result('ytsearch:' + url)
1840
1841             if default_search in ('error', 'fixup_error'):
1842                 raise ExtractorError(
1843                     '%r is not a valid URL. '
1844                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
1845                     % (url, url), expected=True)
1846             else:
1847                 if ':' not in default_search:
1848                     default_search += ':'
1849                 return self.url_result(default_search + url)
1850
1851         url, smuggled_data = unsmuggle_url(url)
1852         force_videoid = None
1853         is_intentional = smuggled_data and smuggled_data.get('to_generic')
1854         if smuggled_data and 'force_videoid' in smuggled_data:
1855             force_videoid = smuggled_data['force_videoid']
1856             video_id = force_videoid
1857         else:
1858             video_id = self._generic_id(url)
1859
1860         self.to_screen('%s: Requesting header' % video_id)
1861
1862         head_req = HEADRequest(url)
1863         head_response = self._request_webpage(
1864             head_req, video_id,
1865             note=False, errnote='Could not send HEAD request to %s' % url,
1866             fatal=False)
1867
1868         if head_response is not False:
1869             # Check for redirect
1870             new_url = head_response.geturl()
1871             if url != new_url:
1872                 self.report_following_redirect(new_url)
1873                 if force_videoid:
1874                     new_url = smuggle_url(
1875                         new_url, {'force_videoid': force_videoid})
1876                 return self.url_result(new_url)
1877
1878         full_response = None
1879         if head_response is False:
1880             request = sanitized_Request(url)
1881             request.add_header('Accept-Encoding', '*')
1882             full_response = self._request_webpage(request, video_id)
1883             head_response = full_response
1884
1885         info_dict = {
1886             'id': video_id,
1887             'title': self._generic_title(url),
1888             'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
1889         }
1890
1891         # Check for direct link to a video
1892         content_type = head_response.headers.get('Content-Type', '').lower()
1893         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
1894         if m:
1895             format_id = m.group('format_id')
1896             if format_id.endswith('mpegurl'):
1897                 formats = self._extract_m3u8_formats(url, video_id, 'mp4')
1898             elif format_id == 'f4m':
1899                 formats = self._extract_f4m_formats(url, video_id)
1900             else:
1901                 formats = [{
1902                     'format_id': m.group('format_id'),
1903                     'url': url,
1904                     'vcodec': 'none' if m.group('type') == 'audio' else None
1905                 }]
1906                 info_dict['direct'] = True
1907             self._sort_formats(formats)
1908             info_dict['formats'] = formats
1909             return info_dict
1910
1911         if not self._downloader.params.get('test', False) and not is_intentional:
1912             force = self._downloader.params.get('force_generic_extractor', False)
1913             self._downloader.report_warning(
1914                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
1915
1916         if not full_response:
1917             request = sanitized_Request(url)
1918             # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
1919             # making it impossible to download only chunk of the file (yet we need only 512kB to
1920             # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
1921             # that will always result in downloading the whole file that is not desirable.
1922             # Therefore for extraction pass we have to override Accept-Encoding to any in order
1923             # to accept raw bytes and being able to download only a chunk.
1924             # It may probably better to solve this by checking Content-Type for application/octet-stream
1925             # after HEAD request finishes, but not sure if we can rely on this.
1926             request.add_header('Accept-Encoding', '*')
1927             full_response = self._request_webpage(request, video_id)
1928
1929         first_bytes = full_response.read(512)
1930
1931         # Is it an M3U playlist?
1932         if first_bytes.startswith(b'#EXTM3U'):
1933             info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
1934             self._sort_formats(info_dict['formats'])
1935             return info_dict
1936
1937         # Maybe it's a direct link to a video?
1938         # Be careful not to download the whole thing!
1939         if not is_html(first_bytes):
1940             self._downloader.report_warning(
1941                 'URL could be a direct video link, returning it as such.')
1942             info_dict.update({
1943                 'direct': True,
1944                 'url': url,
1945             })
1946             return info_dict
1947
1948         webpage = self._webpage_read_content(
1949             full_response, url, video_id, prefix=first_bytes)
1950
1951         self.report_extraction(video_id)
1952
1953         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
1954         try:
1955             doc = compat_etree_fromstring(webpage.encode('utf-8'))
1956             if doc.tag == 'rss':
1957                 return self._extract_rss(url, video_id, doc)
1958             elif doc.tag == 'SmoothStreamingMedia':
1959                 info_dict['formats'] = self._parse_ism_formats(doc, url)
1960                 self._sort_formats(info_dict['formats'])
1961                 return info_dict
1962             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
1963                 smil = self._parse_smil(doc, url, video_id)
1964                 self._sort_formats(smil['formats'])
1965                 return smil
1966             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
1967                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
1968             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
1969                 info_dict['formats'] = self._parse_mpd_formats(
1970                     doc, video_id,
1971                     mpd_base_url=full_response.geturl().rpartition('/')[0],
1972                     mpd_url=url)
1973                 self._sort_formats(info_dict['formats'])
1974                 return info_dict
1975             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
1976                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
1977                 self._sort_formats(info_dict['formats'])
1978                 return info_dict
1979         except compat_xml_parse_error:
1980             pass
1981
1982         # Is it a Camtasia project?
1983         camtasia_res = self._extract_camtasia(url, video_id, webpage)
1984         if camtasia_res is not None:
1985             return camtasia_res
1986
1987         # Sometimes embedded video player is hidden behind percent encoding
1988         # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1989         # Unescaping the whole page allows to handle those cases in a generic way
1990         webpage = compat_urllib_parse_unquote(webpage)
1991
1992         # it's tempting to parse this further, but you would
1993         # have to take into account all the variations like
1994         #   Video Title - Site Name
1995         #   Site Name | Video Title
1996         #   Video Title - Tagline | Site Name
1997         # and so on and so forth; it's just not practical
1998         video_title = self._og_search_title(
1999             webpage, default=None) or self._html_search_regex(
2000             r'(?s)<title>(.*?)</title>', webpage, 'video title',
2001             default='video')
2002
2003         # Try to detect age limit automatically
2004         age_limit = self._rta_search(webpage)
2005         # And then there are the jokers who advertise that they use RTA,
2006         # but actually don't.
2007         AGE_LIMIT_MARKERS = [
2008             r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
2009         ]
2010         if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
2011             age_limit = 18
2012
2013         # video uploader is domain name
2014         video_uploader = self._search_regex(
2015             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
2016
2017         video_description = self._og_search_description(webpage, default=None)
2018         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
2019
2020         # Look for Brightcove Legacy Studio embeds
2021         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
2022         if bc_urls:
2023             entries = [{
2024                 '_type': 'url',
2025                 'url': smuggle_url(bc_url, {'Referer': url}),
2026                 'ie_key': 'BrightcoveLegacy'
2027             } for bc_url in bc_urls]
2028
2029             return {
2030                 '_type': 'playlist',
2031                 'title': video_title,
2032                 'id': video_id,
2033                 'entries': entries,
2034             }
2035
2036         # Look for Brightcove New Studio embeds
2037         bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
2038         if bc_urls:
2039             return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
2040
2041         # Look for ThePlatform embeds
2042         tp_urls = ThePlatformIE._extract_urls(webpage)
2043         if tp_urls:
2044             return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
2045
2046         # Look for Vessel embeds
2047         vessel_urls = VesselIE._extract_urls(webpage)
2048         if vessel_urls:
2049             return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
2050
2051         # Look for embedded rtl.nl player
2052         matches = re.findall(
2053             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
2054             webpage)
2055         if matches:
2056             return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
2057
2058         vimeo_urls = VimeoIE._extract_urls(url, webpage)
2059         if vimeo_urls:
2060             return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
2061
2062         vid_me_embed_url = self._search_regex(
2063             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
2064             webpage, 'vid.me embed', default=None)
2065         if vid_me_embed_url is not None:
2066             return self.url_result(vid_me_embed_url, 'Vidme')
2067
2068         # Look for embedded YouTube player
2069         matches = re.findall(r'''(?x)
2070             (?:
2071                 <iframe[^>]+?src=|
2072                 data-video-url=|
2073                 <embed[^>]+?src=|
2074                 embedSWF\(?:\s*|
2075                 <object[^>]+data=|
2076                 new\s+SWFObject\(
2077             )
2078             (["\'])
2079                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2080                 (?:embed|v|p)/.+?)
2081             \1''', webpage)
2082         if matches:
2083             return self.playlist_from_matches(
2084                 matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
2085
2086         # Look for lazyYT YouTube embed
2087         matches = re.findall(
2088             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
2089         if matches:
2090             return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
2091
2092         # Look for Wordpress "YouTube Video Importer" plugin
2093         matches = re.findall(r'''(?x)<div[^>]+
2094             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2095             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2096         if matches:
2097             return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
2098
2099         matches = DailymotionIE._extract_urls(webpage)
2100         if matches:
2101             return self.playlist_from_matches(matches, video_id, video_title)
2102
2103         # Look for embedded Dailymotion playlist player (#3822)
2104         m = re.search(
2105             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
2106         if m:
2107             playlists = re.findall(
2108                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
2109             if playlists:
2110                 return self.playlist_from_matches(
2111                     playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
2112
2113         # Look for embedded Wistia player
2114         match = re.search(
2115             r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
2116         if match:
2117             embed_url = self._proto_relative_url(
2118                 unescapeHTML(match.group('url')))
2119             return {
2120                 '_type': 'url_transparent',
2121                 'url': embed_url,
2122                 'ie_key': 'Wistia',
2123                 'uploader': video_uploader,
2124             }
2125
2126         match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
2127         if match:
2128             return {
2129                 '_type': 'url_transparent',
2130                 'url': 'wistia:%s' % match.group('id'),
2131                 'ie_key': 'Wistia',
2132                 'uploader': video_uploader,
2133             }
2134
2135         match = re.search(
2136             r'''(?sx)
2137                 <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
2138                 <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
2139             ''', webpage)
2140         if match:
2141             return self.url_result(self._proto_relative_url(
2142                 'wistia:%s' % match.group('id')), 'Wistia')
2143
2144         # Look for SVT player
2145         svt_url = SVTIE._extract_url(webpage)
2146         if svt_url:
2147             return self.url_result(svt_url, 'SVT')
2148
2149         # Look for Bandcamp pages with custom domain
2150         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
2151         if mobj is not None:
2152             burl = unescapeHTML(mobj.group(1))
2153             # Don't set the extractor because it can be a track url or an album
2154             return self.url_result(burl)
2155
2156         # Look for embedded Vevo player
2157         mobj = re.search(
2158             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
2159         if mobj is not None:
2160             return self.url_result(mobj.group('url'))
2161
2162         # Look for embedded Viddler player
2163         mobj = re.search(
2164             r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
2165             webpage)
2166         if mobj is not None:
2167             return self.url_result(mobj.group('url'))
2168
2169         # Look for NYTimes player
2170         mobj = re.search(
2171             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
2172             webpage)
2173         if mobj is not None:
2174             return self.url_result(mobj.group('url'))
2175
2176         # Look for Libsyn player
2177         mobj = re.search(
2178             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
2179         if mobj is not None:
2180             return self.url_result(mobj.group('url'))
2181
2182         # Look for Ooyala videos
2183         mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
2184                 re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
2185                 re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
2186                 re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
2187         if mobj is not None:
2188             embed_token = self._search_regex(
2189                 r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
2190                 webpage, 'ooyala embed token', default=None)
2191             return OoyalaIE._build_url_result(smuggle_url(
2192                 mobj.group('ec'), {
2193                     'domain': url,
2194                     'embed_token': embed_token,
2195                 }))
2196
2197         # Look for multiple Ooyala embeds on SBN network websites
2198         mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
2199         if mobj is not None:
2200             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
2201             if embeds:
2202                 return self.playlist_from_matches(
2203                     embeds, video_id, video_title,
2204                     getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
2205
2206         # Look for Aparat videos
2207         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
2208         if mobj is not None:
2209             return self.url_result(mobj.group(1), 'Aparat')
2210
2211         # Look for MPORA videos
2212         mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
2213         if mobj is not None:
2214             return self.url_result(mobj.group(1), 'Mpora')
2215
2216         # Look for embedded NovaMov-based player
2217         mobj = re.search(
2218             r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
2219                     (?P<url>http://(?:(?:embed|www)\.)?
2220                         (?:novamov\.com|
2221                            nowvideo\.(?:ch|sx|eu|at|ag|co)|
2222                            videoweed\.(?:es|com)|
2223                            movshare\.(?:net|sx|ag)|
2224                            divxstage\.(?:eu|net|ch|co|at|ag))
2225                         /embed\.php.+?)\1''', webpage)
2226         if mobj is not None:
2227             return self.url_result(mobj.group('url'))
2228
2229         # Look for embedded Facebook player
2230         facebook_url = FacebookIE._extract_url(webpage)
2231         if facebook_url is not None:
2232             return self.url_result(facebook_url, 'Facebook')
2233
2234         # Look for embedded VK player
2235         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
2236         if mobj is not None:
2237             return self.url_result(mobj.group('url'), 'VK')
2238
2239         # Look for embedded Odnoklassniki player
2240         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
2241         if mobj is not None:
2242             return self.url_result(mobj.group('url'), 'Odnoklassniki')
2243
2244         # Look for embedded ivi player
2245         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
2246         if mobj is not None:
2247             return self.url_result(mobj.group('url'), 'Ivi')
2248
2249         # Look for embedded Huffington Post player
2250         mobj = re.search(
2251             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
2252         if mobj is not None:
2253             return self.url_result(mobj.group('url'), 'HuffPost')
2254
2255         # Look for embed.ly
2256         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
2257         if mobj is not None:
2258             return self.url_result(mobj.group('url'))
2259         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
2260         if mobj is not None:
2261             return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
2262
2263         # Look for funnyordie embed
2264         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
2265         if matches:
2266             return self.playlist_from_matches(
2267                 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
2268
2269         # Look for BBC iPlayer embed
2270         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
2271         if matches:
2272             return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
2273
2274         # Look for embedded RUTV player
2275         rutv_url = RUTVIE._extract_url(webpage)
2276         if rutv_url:
2277             return self.url_result(rutv_url, 'RUTV')
2278
2279         # Look for embedded TVC player
2280         tvc_url = TVCIE._extract_url(webpage)
2281         if tvc_url:
2282             return self.url_result(tvc_url, 'TVC')
2283
2284         # Look for embedded SportBox player
2285         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
2286         if sportbox_urls:
2287             return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
2288
2289         # Look for embedded XHamster player
2290         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
2291         if xhamster_urls:
2292             return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
2293
2294         # Look for embedded TNAFlixNetwork player
2295         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
2296         if tnaflix_urls:
2297             return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
2298
2299         # Look for embedded PornHub player
2300         pornhub_urls = PornHubIE._extract_urls(webpage)
2301         if pornhub_urls:
2302             return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
2303
2304         # Look for embedded DrTuber player
2305         drtuber_urls = DrTuberIE._extract_urls(webpage)
2306         if drtuber_urls:
2307             return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
2308
2309         # Look for embedded RedTube player
2310         redtube_urls = RedTubeIE._extract_urls(webpage)
2311         if redtube_urls:
2312             return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
2313
2314         # Look for embedded Tvigle player
2315         mobj = re.search(
2316             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
2317         if mobj is not None:
2318             return self.url_result(mobj.group('url'), 'Tvigle')
2319
2320         # Look for embedded TED player
2321         mobj = re.search(
2322             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
2323         if mobj is not None:
2324             return self.url_result(mobj.group('url'), 'TED')
2325
2326         # Look for embedded Ustream videos
2327         ustream_url = UstreamIE._extract_url(webpage)
2328         if ustream_url:
2329             return self.url_result(ustream_url, UstreamIE.ie_key())
2330
2331         # Look for embedded arte.tv player
2332         mobj = re.search(
2333             r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
2334             webpage)
2335         if mobj is not None:
2336             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
2337
2338         # Look for embedded francetv player
2339         mobj = re.search(
2340             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
2341             webpage)
2342         if mobj is not None:
2343             return self.url_result(mobj.group('url'))
2344
2345         # Look for embedded smotri.com player
2346         smotri_url = SmotriIE._extract_url(webpage)
2347         if smotri_url:
2348             return self.url_result(smotri_url, 'Smotri')
2349
2350         # Look for embedded Myvi.ru player
2351         myvi_url = MyviIE._extract_url(webpage)
2352         if myvi_url:
2353             return self.url_result(myvi_url)
2354
2355         # Look for embedded soundcloud player
2356         soundcloud_urls = SoundcloudIE._extract_urls(webpage)
2357         if soundcloud_urls:
2358             return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
2359
2360         # Look for tunein player
2361         tunein_urls = TuneInBaseIE._extract_urls(webpage)
2362         if tunein_urls:
2363             return self.playlist_from_matches(tunein_urls, video_id, video_title)
2364
2365         # Look for embedded mtvservices player
2366         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
2367         if mtvservices_url:
2368             return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
2369
2370         # Look for embedded yahoo player
2371         mobj = re.search(
2372             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
2373             webpage)
2374         if mobj is not None:
2375             return self.url_result(mobj.group('url'), 'Yahoo')
2376
2377         # Look for embedded sbs.com.au player
2378         mobj = re.search(
2379             r'''(?x)
2380             (?:
2381                 <meta\s+property="og:video"\s+content=|
2382                 <iframe[^>]+?src=
2383             )
2384             (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
2385             webpage)
2386         if mobj is not None:
2387             return self.url_result(mobj.group('url'), 'SBS')
2388
2389         # Look for embedded Cinchcast player
2390         mobj = re.search(
2391             r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
2392             webpage)
2393         if mobj is not None:
2394             return self.url_result(mobj.group('url'), 'Cinchcast')
2395
2396         mobj = re.search(
2397             r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
2398             webpage)
2399         if not mobj:
2400             mobj = re.search(
2401                 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
2402                 webpage)
2403         if mobj is not None:
2404             return self.url_result(mobj.group('url'), 'MLB')
2405
2406         mobj = re.search(
2407             r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
2408             webpage)
2409         if mobj is not None:
2410             return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
2411
2412         mobj = re.search(
2413             r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
2414             webpage)
2415         if mobj is not None:
2416             return self.url_result(mobj.group('url'), 'Livestream')
2417
2418         # Look for Zapiks embed
2419         mobj = re.search(
2420             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
2421         if mobj is not None:
2422             return self.url_result(mobj.group('url'), 'Zapiks')
2423
2424         # Look for Kaltura embeds
2425         kaltura_url = KalturaIE._extract_url(webpage)
2426         if kaltura_url:
2427             return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
2428
2429         # Look for Eagle.Platform embeds
2430         eagleplatform_url = EaglePlatformIE._extract_url(webpage)
2431         if eagleplatform_url:
2432             return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
2433
2434         # Look for ClipYou (uses Eagle.Platform) embeds
2435         mobj = re.search(
2436             r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
2437         if mobj is not None:
2438             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
2439
2440         # Look for Pladform embeds
2441         pladform_url = PladformIE._extract_url(webpage)
2442         if pladform_url:
2443             return self.url_result(pladform_url)
2444
2445         # Look for Videomore embeds
2446         videomore_url = VideomoreIE._extract_url(webpage)
2447         if videomore_url:
2448             return self.url_result(videomore_url)
2449
2450         # Look for Webcaster embeds
2451         webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
2452         if webcaster_url:
2453             return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
2454
2455         # Look for Playwire embeds
2456         mobj = re.search(
2457             r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
2458         if mobj is not None:
2459             return self.url_result(mobj.group('url'))
2460
2461         # Look for 5min embeds
2462         mobj = re.search(
2463             r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
2464         if mobj is not None:
2465             return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
2466
2467         # Look for Crooks and Liars embeds
2468         mobj = re.search(
2469             r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
2470         if mobj is not None:
2471             return self.url_result(mobj.group('url'))
2472
2473         # Look for NBC Sports VPlayer embeds
2474         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
2475         if nbc_sports_url:
2476             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
2477
2478         # Look for NBC News embeds
2479         nbc_news_embed_url = re.search(
2480             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
2481         if nbc_news_embed_url:
2482             return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
2483
2484         # Look for Google Drive embeds
2485         google_drive_url = GoogleDriveIE._extract_url(webpage)
2486         if google_drive_url:
2487             return self.url_result(google_drive_url, 'GoogleDrive')
2488
2489         # Look for UDN embeds
2490         mobj = re.search(
2491             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
2492         if mobj is not None:
2493             return self.url_result(
2494                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
2495
2496         # Look for Senate ISVP iframe
2497         senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
2498         if senate_isvp_url:
2499             return self.url_result(senate_isvp_url, 'SenateISVP')
2500
2501         # Look for Dailymotion Cloud videos
2502         dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
2503         if dmcloud_url:
2504             return self.url_result(dmcloud_url, 'DailymotionCloud')
2505
2506         # Look for OnionStudios embeds
2507         onionstudios_url = OnionStudiosIE._extract_url(webpage)
2508         if onionstudios_url:
2509             return self.url_result(onionstudios_url)
2510
2511         # Look for ViewLift embeds
2512         viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
2513         if viewlift_url:
2514             return self.url_result(viewlift_url)
2515
2516         # Look for JWPlatform embeds
2517         jwplatform_url = JWPlatformIE._extract_url(webpage)
2518         if jwplatform_url:
2519             return self.url_result(jwplatform_url, 'JWPlatform')
2520
2521         # Look for Digiteka embeds
2522         digiteka_url = DigitekaIE._extract_url(webpage)
2523         if digiteka_url:
2524             return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
2525
2526         # Look for Arkena embeds
2527         arkena_url = ArkenaIE._extract_url(webpage)
2528         if arkena_url:
2529             return self.url_result(arkena_url, ArkenaIE.ie_key())
2530
2531         # Look for Piksel embeds
2532         piksel_url = PikselIE._extract_url(webpage)
2533         if piksel_url:
2534             return self.url_result(piksel_url, PikselIE.ie_key())
2535
2536         # Look for Limelight embeds
2537         limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
2538         if limelight_urls:
2539             return self.playlist_result(
2540                 limelight_urls, video_id, video_title, video_description)
2541
2542         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
2543         if mobj:
2544             lm = {
2545                 'Media': 'media',
2546                 'Channel': 'channel',
2547                 'ChannelList': 'channel_list',
2548             }
2549             return self.url_result(smuggle_url('limelight:%s:%s' % (
2550                 lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
2551                 'Limelight%s' % mobj.group(1), mobj.group(2))
2552
2553         mobj = re.search(
2554             r'''(?sx)
2555                 <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
2556                     <param[^>]+
2557                         name=(["\'])flashVars\2[^>]+
2558                         value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
2559             ''', webpage)
2560         if mobj:
2561             return self.url_result(smuggle_url(
2562                 'limelight:media:%s' % mobj.group('id'),
2563                 {'source_url': url}), 'LimelightMedia', mobj.group('id'))
2564
2565         # Look for Anvato embeds
2566         anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
2567         if anvato_urls:
2568             return self.playlist_result(
2569                 anvato_urls, video_id, video_title, video_description)
2570
2571         # Look for AdobeTVVideo embeds
2572         mobj = re.search(
2573             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
2574             webpage)
2575         if mobj is not None:
2576             return self.url_result(
2577                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
2578                 'AdobeTVVideo')
2579
2580         # Look for Vine embeds
2581         mobj = re.search(
2582             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
2583             webpage)
2584         if mobj is not None:
2585             return self.url_result(
2586                 self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
2587
2588         # Look for VODPlatform embeds
2589         mobj = re.search(
2590             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
2591             webpage)
2592         if mobj is not None:
2593             return self.url_result(
2594                 self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
2595
2596         # Look for Mangomolo embeds
2597         mobj = re.search(
2598             r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
2599                 (?:
2600                     video\?.*?\bid=(?P<video_id>\d+)|
2601                     index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
2602                 ).+?)\1''', webpage)
2603         if mobj is not None:
2604             info = {
2605                 '_type': 'url_transparent',
2606                 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
2607                 'title': video_title,
2608                 'description': video_description,
2609                 'thumbnail': video_thumbnail,
2610                 'uploader': video_uploader,
2611             }
2612             video_id = mobj.group('video_id')
2613             if video_id:
2614                 info.update({
2615                     'ie_key': 'MangomoloVideo',
2616                     'id': video_id,
2617                 })
2618             else:
2619                 info.update({
2620                     'ie_key': 'MangomoloLive',
2621                     'id': mobj.group('channel_id'),
2622                 })
2623             return info
2624
2625         # Look for Instagram embeds
2626         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
2627         if instagram_embed_url is not None:
2628             return self.url_result(
2629                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
2630
2631         # Look for LiveLeak embeds
2632         liveleak_url = LiveLeakIE._extract_url(webpage)
2633         if liveleak_url:
2634             return self.url_result(liveleak_url, 'LiveLeak')
2635
2636         # Look for 3Q SDN embeds
2637         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
2638         if threeqsdn_url:
2639             return {
2640                 '_type': 'url_transparent',
2641                 'ie_key': ThreeQSDNIE.ie_key(),
2642                 'url': self._proto_relative_url(threeqsdn_url),
2643                 'title': video_title,
2644                 'description': video_description,
2645                 'thumbnail': video_thumbnail,
2646                 'uploader': video_uploader,
2647             }
2648
2649         # Look for VBOX7 embeds
2650         vbox7_url = Vbox7IE._extract_url(webpage)
2651         if vbox7_url:
2652             return self.url_result(vbox7_url, Vbox7IE.ie_key())
2653
2654         # Look for DBTV embeds
2655         dbtv_urls = DBTVIE._extract_urls(webpage)
2656         if dbtv_urls:
2657             return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
2658
2659         # Look for Videa embeds
2660         videa_urls = VideaIE._extract_urls(webpage)
2661         if videa_urls:
2662             return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
2663
2664         # Look for 20 minuten embeds
2665         twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
2666         if twentymin_urls:
2667             return self.playlist_from_matches(
2668                 twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
2669
2670         # Look for Openload embeds
2671         openload_urls = OpenloadIE._extract_urls(webpage)
2672         if openload_urls:
2673             return self.playlist_from_matches(
2674                 openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
2675
2676         # Look for VideoPress embeds
2677         videopress_urls = VideoPressIE._extract_urls(webpage)
2678         if videopress_urls:
2679             return self.playlist_from_matches(
2680                 videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
2681
2682         # Look for Rutube embeds
2683         rutube_urls = RutubeIE._extract_urls(webpage)
2684         if rutube_urls:
2685             return self.playlist_from_matches(
2686                 rutube_urls, ie=RutubeIE.ie_key())
2687
2688         # Look for WashingtonPost embeds
2689         wapo_urls = WashingtonPostIE._extract_urls(webpage)
2690         if wapo_urls:
2691             return self.playlist_from_matches(
2692                 wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
2693
2694         # Looking for http://schema.org/VideoObject
2695         json_ld = self._search_json_ld(
2696             webpage, video_id, default={}, expected_type='VideoObject')
2697         if json_ld.get('url'):
2698             info_dict.update({
2699                 'title': video_title or info_dict['title'],
2700                 'description': video_description,
2701                 'thumbnail': video_thumbnail,
2702                 'age_limit': age_limit
2703             })
2704             info_dict.update(json_ld)
2705             return info_dict
2706
2707         # Look for HTML5 media
2708         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
2709         if entries:
2710             for entry in entries:
2711                 entry.update({
2712                     'id': video_id,
2713                     'title': video_title,
2714                 })
2715                 self._sort_formats(entry['formats'])
2716             return self.playlist_result(entries)
2717
2718         jwplayer_data = self._find_jwplayer_data(
2719             webpage, video_id, transform_source=js_to_json)
2720         if jwplayer_data:
2721             info = self._parse_jwplayer_data(
2722                 jwplayer_data, video_id, require_title=False, base_url=url)
2723             if not info.get('title'):
2724                 info['title'] = video_title
2725             return info
2726
2727         def check_video(vurl):
2728             if YoutubeIE.suitable(vurl):
2729                 return True
2730             if RtmpIE.suitable(vurl):
2731                 return True
2732             vpath = compat_urlparse.urlparse(vurl).path
2733             vext = determine_ext(vpath)
2734             return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
2735
2736         def filter_video(urls):
2737             return list(filter(check_video, urls))
2738
2739         # Start with something easy: JW Player in SWFObject
2740         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
2741         if not found:
2742             # Look for gorilla-vid style embedding
2743             found = filter_video(re.findall(r'''(?sx)
2744                 (?:
2745                     jw_plugins|
2746                     JWPlayerOptions|
2747                     jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
2748                 )
2749                 .*?
2750                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
2751         if not found:
2752             # Broaden the search a little bit
2753             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
2754         if not found:
2755             # Broaden the findall a little bit: JWPlayer JS loader
2756             found = filter_video(re.findall(
2757                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
2758         if not found:
2759             # Flow player
2760             found = filter_video(re.findall(r'''(?xs)
2761                 flowplayer\("[^"]+",\s*
2762                     \{[^}]+?\}\s*,
2763                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
2764                         ["']?url["']?\s*:\s*["']([^"']+)["']
2765             ''', webpage))
2766         if not found:
2767             # Cinerama player
2768             found = re.findall(
2769                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
2770         if not found:
2771             # Try to find twitter cards info
2772             # twitter:player:stream should be checked before twitter:player since
2773             # it is expected to contain a raw stream (see
2774             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2775             found = filter_video(re.findall(
2776                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
2777         if not found:
2778             # We look for Open Graph info:
2779             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
2780             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
2781             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
2782             if m_video_type is not None:
2783                 found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
2784         if not found:
2785             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
2786             found = re.search(
2787                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
2788                 r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
2789                 webpage)
2790             if not found:
2791                 # Look also in Refresh HTTP header
2792                 refresh_header = head_response.headers.get('Refresh')
2793                 if refresh_header:
2794                     # In python 2 response HTTP headers are bytestrings
2795                     if sys.version_info < (3, 0) and isinstance(refresh_header, str):
2796                         refresh_header = refresh_header.decode('iso-8859-1')
2797                     found = re.search(REDIRECT_REGEX, refresh_header)
2798             if found:
2799                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
2800                 if new_url != url:
2801                     self.report_following_redirect(new_url)
2802                     return {
2803                         '_type': 'url',
2804                         'url': new_url,
2805                     }
2806                 else:
2807                     found = None
2808
2809         if not found:
2810             # twitter:player is a https URL to iframe player that may or may not
2811             # be supported by youtube-dl thus this is checked the very last (see
2812             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
2813             embed_url = self._html_search_meta('twitter:player', webpage, default=None)
2814             if embed_url:
2815                 return self.url_result(embed_url)
2816
2817         if not found:
2818             raise UnsupportedError(url)
2819
2820         entries = []
2821         for video_url in orderedSet(found):
2822             video_url = unescapeHTML(video_url)
2823             video_url = video_url.replace('\\/', '/')
2824             video_url = compat_urlparse.urljoin(url, video_url)
2825             video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
2826
2827             # Sometimes, jwplayer extraction will result in a YouTube URL
2828             if YoutubeIE.suitable(video_url):
2829                 entries.append(self.url_result(video_url, 'Youtube'))
2830                 continue
2831
2832             # here's a fun little line of code for you:
2833             video_id = os.path.splitext(video_id)[0]
2834
2835             entry_info_dict = {
2836                 'id': video_id,
2837                 'uploader': video_uploader,
2838                 'title': video_title,
2839                 'age_limit': age_limit,
2840             }
2841
2842             if RtmpIE.suitable(video_url):
2843                 entry_info_dict.update({
2844                     '_type': 'url_transparent',
2845                     'ie_key': RtmpIE.ie_key(),
2846                     'url': video_url,
2847                 })
2848                 entries.append(entry_info_dict)
2849                 continue
2850
2851             ext = determine_ext(video_url)
2852             if ext == 'smil':
2853                 entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
2854             elif ext == 'xspf':
2855                 return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
2856             elif ext == 'm3u8':
2857                 entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
2858             elif ext == 'mpd':
2859                 entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
2860             elif ext == 'f4m':
2861                 entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
2862             elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
2863                 # Just matching .ism/manifest is not enough to be reliably sure
2864                 # whether it's actually an ISM manifest or some other streaming
2865                 # manifest since there are various streaming URL formats
2866                 # possible (see [1]) as well as some other shenanigans like
2867                 # .smil/manifest URLs that actually serve an ISM (see [2]) and
2868                 # so on.
2869                 # Thus the most reasonable way to solve this is to delegate
2870                 # to generic extractor in order to look into the contents of
2871                 # the manifest itself.
2872                 # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
2873                 # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
2874                 entry_info_dict = self.url_result(
2875                     smuggle_url(video_url, {'to_generic': True}),
2876                     GenericIE.ie_key())
2877             else:
2878                 entry_info_dict['url'] = video_url
2879
2880             if entry_info_dict.get('formats'):
2881                 self._sort_formats(entry_info_dict['formats'])
2882
2883             entries.append(entry_info_dict)
2884
2885         if len(entries) == 1:
2886             return entries[0]
2887         else:
2888             for num, e in enumerate(entries, start=1):
2889                 # 'url' results don't have a title
2890                 if e.get('title') is not None:
2891                     e['title'] = '%s (%d)' % (e['title'], num)
2892             return {
2893                 '_type': 'playlist',
2894                 'entries': entries,
2895             }