RegexNotFoundError,
sanitize_filename,
unescapeHTML,
+ unified_strdate,
url_basename,
xpath_text,
xpath_with_ns,
description: Full video description.
uploader: Full name of the video uploader.
creator: The main artist who created the video.
+ release_date: The date (YYYYMMDD) when the video was released.
timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD).
If not explicitly set, calculated from timestamp.
'%s. Use --username and --password or --netrc to provide account credentials.' % msg,
expected=True)
+ @staticmethod
+ def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
+ raise ExtractorError(
+ '%s. You might want to use --proxy to workaround.' % msg,
+ expected=True)
+
# Methods for following #608
@staticmethod
def url_result(url, ie=None, video_id=None, video_title=None):
@staticmethod
def _hidden_inputs(html):
+ html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
hidden_inputs = {}
- for input in re.findall(r'<input([^>]+)>', html):
+ for input in re.findall(r'(?i)<input([^>]+)>', html):
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
continue
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
def _form_hidden_inputs(self, form_id, html):
form = self._search_regex(
- r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
+ r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
html, '%s form' % form_id, group='form')
return self._hidden_inputs(form)
video_id = os.path.splitext(url_basename(smil_url))[0]
title = None
description = None
+ upload_date = None
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
name = meta.attrib.get('name')
content = meta.attrib.get('content')
title = content
elif not description and name in ('description', 'abstract'):
description = content
+ elif not upload_date and name == 'date':
+ upload_date = unified_strdate(content)
+
+ thumbnails = [{
+ 'id': image.get('type'),
+ 'url': image.get('src'),
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')]
return {
'id': video_id,
'title': title or video_id,
'description': description,
+ 'upload_date': upload_date,
+ 'thumbnails': thumbnails,
'formats': formats,
'subtitles': subtitles,
}
if not src:
continue
- bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+ bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
filesize = int_or_none(video.get('size') or video.get('fileSize'))
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds'))
continue
- if src_url.startswith('http'):
+ if src_url.startswith('http') and self._is_valid_url(src, video_id):
http_count += 1
formats.append({
'url': src_url,