import os
import re
+import sys
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import (
+ compat_etree_fromstring,
compat_urllib_parse_unquote,
compat_urllib_request,
compat_urlparse,
HEADRequest,
is_html,
orderedSet,
- parse_xml,
smuggle_url,
unescapeHTML,
unified_strdate,
url_basename,
xpath_text,
)
-from .brightcove import BrightcoveIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
+from .mtv import MTVServicesEmbeddedIE
class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'Automatics, robotics and biocybernetics',
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+ 'upload_date': '20130627',
'formats': 'mincount:16',
'subtitles': 'mincount:1',
},
'skip_download': False,
}
},
+ {
+ # redirect in Refresh HTTP header
+ 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
+ 'info_dict': {
+ 'id': 'pO8h3EaFRdo',
+ 'ext': 'mp4',
+ 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
+ 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
+ 'upload_date': '20150917',
+ 'uploader_id': 'brtvofficial',
+ 'uploader': 'Boiler Room',
+ },
+ 'params': {
+ 'skip_download': False,
+ },
+ },
{
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
# it also tests brightcove videos that need to set the 'Referer' in the
# http requests
{
- 'add_ie': ['Brightcove'],
+ 'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
'info_dict': {
'id': '2765128793001',
'uploader': 'thestar.com',
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
},
- 'add_ie': ['Brightcove'],
+ 'add_ie': ['BrightcoveLegacy'],
},
{
'url': 'http://www.championat.com/video/football/v/87/87499.html',
},
{
# https://github.com/rg3/youtube-dl/issues/3541
- 'add_ie': ['Brightcove'],
+ 'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
'info_dict': {
'id': '3866516442001',
'ext': 'mp4',
'title': 'cinemasnob',
},
+ },
+ # BrightcoveInPageEmbed embed
+ {
+ 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
+ 'info_dict': {
+ 'id': '4238694884001',
+ 'ext': 'flv',
+ 'title': 'Tabletop: Dread, Last Thoughts',
+ 'description': 'Tabletop: Dread, Last Thoughts',
+ 'duration': 51690,
+ },
}
]
# Is it an RSS feed, a SMIL file or a XSPF playlist?
try:
- doc = parse_xml(webpage)
+ doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
- # Look for BrightCove:
- bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
+ # Look for Brightcove Legacy Studio embeds
+ bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
if bc_urls:
self.to_screen('Brightcove video detected.')
entries = [{
'_type': 'url',
'url': smuggle_url(bc_url, {'Referer': url}),
- 'ie_key': 'Brightcove'
+ 'ie_key': 'BrightcoveLegacy'
} for bc_url in bc_urls]
return {
'entries': entries,
}
+ # Look for Brightcove New Studio embeds
+ bc_urls = BrightcoveNewIE._extract_urls(webpage)
+ if bc_urls:
+ return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
+
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
return self.url_result(url, ie='Vulture')
# Look for embedded mtvservices player
- mobj = re.search(
- r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
- webpage)
- if mobj is not None:
- url = unescapeHTML(mobj.group('url'))
- return self.url_result(url, ie='MTVServicesEmbedded')
+ mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
+ if mtvservices_url:
+ return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
# Look for embedded yahoo player
mobj = re.search(
return self.url_result(mobj.group('url'), 'MLB')
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
+ r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
webpage)
if mobj is not None:
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
- mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
- re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
+ mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
+ re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
# Look also in Refresh HTTP header
refresh_header = head_response.headers.get('Refresh')
if refresh_header:
+ # In python 2 response HTTP headers are bytestrings
+ if sys.version_info < (3, 0) and isinstance(refresh_header, str):
+ refresh_header = refresh_header.decode('iso-8859-1')
found = re.search(REDIRECT_REGEX, refresh_header)
if found:
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))