X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fcommon.py;h=c2aa3a7495d2591ac6b2a50dfd19702a4a633283;hb=4de6131090e0232c7cc99bcaafe6a3e71269b7af;hp=f731703fb25c5f13d517808585b4826ec84fc236;hpb=f738dd7b7c7aefe4d26a65905dee9567a691d262;p=youtube-dl
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index f731703fb..c2aa3a749 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -39,6 +39,7 @@ from ..utils import (
RegexNotFoundError,
sanitize_filename,
unescapeHTML,
+ unified_strdate,
url_basename,
xpath_text,
xpath_with_ns,
@@ -152,6 +153,7 @@ class InfoExtractor(object):
description: Full video description.
uploader: Full name of the video uploader.
creator: The main artist who created the video.
+ release_date: The date (YYYYMMDD) when the video was released.
timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD).
If not explicitly set, calculated from timestamp.
@@ -510,6 +512,18 @@ class InfoExtractor(object):
"""Report attempt to log in."""
self.to_screen('Logging in')
+ @staticmethod
+ def raise_login_required(msg='This video is only available for registered users'):
+ raise ExtractorError(
+ '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
+ expected=True)
+
+ @staticmethod
+ def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
+ raise ExtractorError(
+ '%s. You might want to use --proxy to workaround.' % msg,
+ expected=True)
+
# Methods for following #608
@staticmethod
def url_result(url, ie=None, video_id=None, video_title=None):
@@ -725,9 +739,10 @@ class InfoExtractor(object):
@staticmethod
def _hidden_inputs(html):
+ html = re.sub(r'', '', html)
hidden_inputs = {}
- for input in re.findall(r']+)>', html):
- if not re.search(r'type=(["\'])hidden\1', input):
+ for input in re.findall(r'(?i)]+)>', html):
+ if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
continue
name = re.search(r'name=(["\'])(?P.+?)\1', input)
if not name:
@@ -740,7 +755,7 @@ class InfoExtractor(object):
def _form_hidden_inputs(self, form_id, html):
form = self._search_regex(
- r'(?s)' % form_id,
+ r'(?is)' % form_id,
html, '%s form' % form_id, group='form')
return self._hidden_inputs(form)
@@ -855,13 +870,18 @@ class InfoExtractor(object):
time.sleep(timeout)
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
- transform_source=lambda s: fix_xml_ampersands(s).strip()):
+ transform_source=lambda s: fix_xml_ampersands(s).strip(),
+ fatal=True):
manifest = self._download_xml(
manifest_url, video_id, 'Downloading f4m manifest',
'Unable to download f4m manifest',
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
- transform_source=transform_source)
+ transform_source=transform_source,
+ fatal=fatal)
+
+ if manifest is False:
+ return manifest
formats = []
manifest_version = '1.0'
@@ -882,7 +902,10 @@ class InfoExtractor(object):
# may differ leading to inability to resolve the format by requested
# bitrate in f4m downloader
if determine_ext(manifest_url) == 'f4m':
- formats.extend(self._extract_f4m_formats(manifest_url, video_id, preference, f4m_id))
+ f4m_formats = self._extract_f4m_formats(
+ manifest_url, video_id, preference, f4m_id, fatal=fatal)
+ if f4m_formats:
+ formats.extend(f4m_formats)
continue
tbr = int_or_none(media_el.attrib.get('bitrate'))
formats.append({
@@ -1030,6 +1053,7 @@ class InfoExtractor(object):
video_id = os.path.splitext(url_basename(smil_url))[0]
title = None
description = None
+ upload_date = None
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
name = meta.attrib.get('name')
content = meta.attrib.get('content')
@@ -1039,11 +1063,22 @@ class InfoExtractor(object):
title = content
elif not description and name in ('description', 'abstract'):
description = content
+ elif not upload_date and name == 'date':
+ upload_date = unified_strdate(content)
+
+ thumbnails = [{
+ 'id': image.get('type'),
+ 'url': image.get('src'),
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')]
return {
'id': video_id,
'title': title or video_id,
'description': description,
+ 'upload_date': upload_date,
+ 'thumbnails': thumbnails,
'formats': formats,
'subtitles': subtitles,
}
@@ -1070,7 +1105,7 @@ class InfoExtractor(object):
if not src:
continue
- bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+ bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
filesize = int_or_none(video.get('size') or video.get('fileSize'))
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
@@ -1102,8 +1137,10 @@ class InfoExtractor(object):
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
if proto == 'm3u8' or src_ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- src_url, video_id, ext or 'mp4', m3u8_id='hls'))
+ m3u8_formats = self._extract_m3u8_formats(
+ src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
continue
if src_ext == 'f4m':
@@ -1118,7 +1155,7 @@ class InfoExtractor(object):
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds'))
continue
- if src_url.startswith('http'):
+ if src_url.startswith('http') and self._is_valid_url(src, video_id):
http_count += 1
formats.append({
'url': src_url,
@@ -1289,11 +1326,11 @@ class InfoExtractor(object):
return ret
@classmethod
- def _merge_subtitles(kls, subtitle_dict1, subtitle_dict2):
+ def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
""" Merge two subtitle dictionaries, language by language. """
ret = dict(subtitle_dict1)
for lang in subtitle_dict2:
- ret[lang] = kls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
+ ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
return ret
def extract_automatic_captions(self, *args, **kwargs):